use of org.apache.druid.indexing.common.LockGranularity in project druid by druid-io.
the class CompactionTask method createIngestionSchema.
/**
* Generate {@link ParallelIndexIngestionSpec} from input segments.
*
* @return an empty list if input segments don't exist. Otherwise, a generated ingestionSpec.
*/
@VisibleForTesting
static List<ParallelIndexIngestionSpec> createIngestionSchema(final TaskToolbox toolbox, final LockGranularity lockGranularityInUse, final SegmentProvider segmentProvider, final PartitionConfigurationManager partitionConfigurationManager, @Nullable final DimensionsSpec dimensionsSpec, @Nullable final ClientCompactionTaskTransformSpec transformSpec, @Nullable final AggregatorFactory[] metricsSpec, @Nullable final ClientCompactionTaskGranularitySpec granularitySpec, final CoordinatorClient coordinatorClient, final SegmentCacheManagerFactory segmentCacheManagerFactory, final RetryPolicyFactory retryPolicyFactory, final boolean dropExisting) throws IOException, SegmentLoadingException {
NonnullPair<Map<DataSegment, File>, List<TimelineObjectHolder<String, DataSegment>>> pair = prepareSegments(toolbox, segmentProvider, lockGranularityInUse);
final Map<DataSegment, File> segmentFileMap = pair.lhs;
final List<TimelineObjectHolder<String, DataSegment>> timelineSegments = pair.rhs;
if (timelineSegments.size() == 0) {
return Collections.emptyList();
}
// find metadata for interval
// queryableIndexAndSegments is sorted by the interval of the dataSegment
final List<NonnullPair<QueryableIndex, DataSegment>> queryableIndexAndSegments = loadSegments(timelineSegments, segmentFileMap, toolbox.getIndexIO());
final CompactionTuningConfig compactionTuningConfig = partitionConfigurationManager.computeTuningConfig();
if (granularitySpec == null || granularitySpec.getSegmentGranularity() == null) {
// original granularity
final Map<Interval, List<NonnullPair<QueryableIndex, DataSegment>>> intervalToSegments = new TreeMap<>(Comparators.intervalsByStartThenEnd());
queryableIndexAndSegments.forEach(p -> intervalToSegments.computeIfAbsent(p.rhs.getInterval(), k -> new ArrayList<>()).add(p));
// unify overlapping intervals to ensure overlapping segments compacting in the same indexSpec
List<NonnullPair<Interval, List<NonnullPair<QueryableIndex, DataSegment>>>> intervalToSegmentsUnified = new ArrayList<>();
Interval union = null;
List<NonnullPair<QueryableIndex, DataSegment>> segments = new ArrayList<>();
for (Entry<Interval, List<NonnullPair<QueryableIndex, DataSegment>>> entry : intervalToSegments.entrySet()) {
Interval cur = entry.getKey();
if (union == null) {
union = cur;
segments.addAll(entry.getValue());
} else if (union.overlaps(cur)) {
union = Intervals.utc(union.getStartMillis(), Math.max(union.getEndMillis(), cur.getEndMillis()));
segments.addAll(entry.getValue());
} else {
intervalToSegmentsUnified.add(new NonnullPair<>(union, segments));
union = cur;
segments = new ArrayList<>(entry.getValue());
}
}
intervalToSegmentsUnified.add(new NonnullPair<>(union, segments));
final List<ParallelIndexIngestionSpec> specs = new ArrayList<>(intervalToSegmentsUnified.size());
for (NonnullPair<Interval, List<NonnullPair<QueryableIndex, DataSegment>>> entry : intervalToSegmentsUnified) {
final Interval interval = entry.lhs;
final List<NonnullPair<QueryableIndex, DataSegment>> segmentsToCompact = entry.rhs;
// If granularitySpec is not null, then set segmentGranularity. Otherwise,
// creates new granularitySpec and set segmentGranularity
Granularity segmentGranularityToUse = GranularityType.fromPeriod(interval.toPeriod()).getDefaultGranularity();
final DataSchema dataSchema = createDataSchema(segmentProvider.dataSource, segmentsToCompact, dimensionsSpec, transformSpec, metricsSpec, granularitySpec == null ? new ClientCompactionTaskGranularitySpec(segmentGranularityToUse, null, null) : granularitySpec.withSegmentGranularity(segmentGranularityToUse));
specs.add(new ParallelIndexIngestionSpec(dataSchema, createIoConfig(toolbox, dataSchema, interval, coordinatorClient, segmentCacheManagerFactory, retryPolicyFactory, dropExisting), compactionTuningConfig));
}
return specs;
} else {
// given segment granularity
final DataSchema dataSchema = createDataSchema(segmentProvider.dataSource, queryableIndexAndSegments, dimensionsSpec, transformSpec, metricsSpec, granularitySpec);
return Collections.singletonList(new ParallelIndexIngestionSpec(dataSchema, createIoConfig(toolbox, dataSchema, segmentProvider.interval, coordinatorClient, segmentCacheManagerFactory, retryPolicyFactory, dropExisting), compactionTuningConfig));
}
}
use of org.apache.druid.indexing.common.LockGranularity in project druid by druid-io.
the class SegmentAllocateActionTest method testResumeSequence.
@Test
public void testResumeSequence() {
final Task task = NoopTask.create();
taskActionTestKit.getTaskLockbox().add(task);
final Map<Integer, SegmentIdWithShardSpec> allocatedPartyTimeIds = new HashMap<>();
final Map<Integer, SegmentIdWithShardSpec> allocatedFutureIds = new HashMap<>();
final SegmentIdWithShardSpec id1 = allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, "s1", null);
Assert.assertNotNull(id1);
allocatedPartyTimeIds.put(id1.getShardSpec().getPartitionNum(), id1);
final SegmentIdWithShardSpec id2 = allocate(task, THE_DISTANT_FUTURE, Granularities.NONE, Granularities.HOUR, "s1", id1.toString());
Assert.assertNotNull(id2);
allocatedFutureIds.put(id2.getShardSpec().getPartitionNum(), id2);
final SegmentIdWithShardSpec id3 = allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, "s1", id2.toString());
Assert.assertNotNull(id3);
allocatedPartyTimeIds.put(id3.getShardSpec().getPartitionNum(), id3);
final SegmentIdWithShardSpec id4 = allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, "s1", id1.toString());
Assert.assertNull(id4);
final SegmentIdWithShardSpec id5 = allocate(task, THE_DISTANT_FUTURE, Granularities.NONE, Granularities.HOUR, "s1", id1.toString());
Assert.assertNotNull(id5);
allocatedFutureIds.put(id5.getShardSpec().getPartitionNum(), id5);
final SegmentIdWithShardSpec id6 = allocate(task, THE_DISTANT_FUTURE, Granularities.NONE, Granularities.MINUTE, "s1", id1.toString());
Assert.assertNull(id6);
final SegmentIdWithShardSpec id7 = allocate(task, THE_DISTANT_FUTURE, Granularities.NONE, Granularities.DAY, "s1", id1.toString());
Assert.assertNotNull(id7);
allocatedFutureIds.put(id7.getShardSpec().getPartitionNum(), id7);
if (lockGranularity == LockGranularity.TIME_CHUNK) {
final TaskLock partyLock = Iterables.getOnlyElement(FluentIterable.from(taskActionTestKit.getTaskLockbox().findLocksForTask(task)).filter(new Predicate<TaskLock>() {
@Override
public boolean apply(TaskLock input) {
return input.getInterval().contains(PARTY_TIME);
}
}));
final TaskLock futureLock = Iterables.getOnlyElement(FluentIterable.from(taskActionTestKit.getTaskLockbox().findLocksForTask(task)).filter(new Predicate<TaskLock>() {
@Override
public boolean apply(TaskLock input) {
return input.getInterval().contains(THE_DISTANT_FUTURE);
}
}));
assertSameIdentifier(id1, new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), partyLock.getVersion(), new NumberedShardSpec(0, 0)));
assertSameIdentifier(id2, new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(THE_DISTANT_FUTURE), futureLock.getVersion(), new NumberedShardSpec(0, 0)));
assertSameIdentifier(id3, new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), partyLock.getVersion(), new NumberedShardSpec(1, 0)));
} else {
final List<TaskLock> partyLocks = taskActionTestKit.getTaskLockbox().findLocksForTask(task).stream().filter(input -> input.getInterval().contains(PARTY_TIME)).collect(Collectors.toList());
Assert.assertEquals(2, partyLocks.size());
final Map<Integer, SegmentLock> partitionIdToLock = new HashMap<>();
partyLocks.forEach(lock -> {
Assert.assertEquals(LockGranularity.SEGMENT, lock.getGranularity());
final SegmentLock segmentLock = (SegmentLock) lock;
partitionIdToLock.put(segmentLock.getPartitionId(), segmentLock);
});
for (Entry<Integer, SegmentLock> entry : partitionIdToLock.entrySet()) {
assertSameIdentifier(new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), allocatedPartyTimeIds.get(entry.getKey()).getVersion(), new NumberedShardSpec(entry.getValue().getPartitionId(), 0)), allocatedPartyTimeIds.get(entry.getKey()));
}
final List<TaskLock> futureLocks = taskActionTestKit.getTaskLockbox().findLocksForTask(task).stream().filter(input -> input.getInterval().contains(THE_DISTANT_FUTURE)).collect(Collectors.toList());
Assert.assertEquals(1, futureLocks.size());
partitionIdToLock.clear();
futureLocks.forEach(lock -> {
Assert.assertEquals(LockGranularity.SEGMENT, lock.getGranularity());
final SegmentLock segmentLock = (SegmentLock) lock;
partitionIdToLock.put(segmentLock.getPartitionId(), segmentLock);
});
for (Entry<Integer, SegmentLock> entry : partitionIdToLock.entrySet()) {
assertSameIdentifier(new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(THE_DISTANT_FUTURE), allocatedFutureIds.get(entry.getKey()).getVersion(), new NumberedShardSpec(entry.getValue().getPartitionId(), 0)), allocatedFutureIds.get(entry.getKey()));
}
}
Assert.assertNull(id4);
assertSameIdentifier(id2, id5);
Assert.assertNull(id6);
assertSameIdentifier(id2, id7);
}
use of org.apache.druid.indexing.common.LockGranularity in project druid by druid-io.
the class SegmentAllocateActionTest method testManySegmentsSameInterval.
@Test
public void testManySegmentsSameInterval() {
final Task task = NoopTask.create();
taskActionTestKit.getTaskLockbox().add(task);
final SegmentIdWithShardSpec id1 = allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, "s1", null);
final SegmentIdWithShardSpec id2 = allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, "s1", id1.toString());
final SegmentIdWithShardSpec id3 = allocate(task, PARTY_TIME, Granularities.NONE, Granularities.HOUR, "s1", id2.toString());
if (lockGranularity == LockGranularity.TIME_CHUNK) {
final TaskLock partyLock = Iterables.getOnlyElement(FluentIterable.from(taskActionTestKit.getTaskLockbox().findLocksForTask(task)).filter(input -> input.getInterval().contains(PARTY_TIME)));
assertSameIdentifier(id1, new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), partyLock.getVersion(), new NumberedShardSpec(0, 0)));
assertSameIdentifier(id2, new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), partyLock.getVersion(), new NumberedShardSpec(1, 0)));
assertSameIdentifier(id3, new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), partyLock.getVersion(), new NumberedShardSpec(2, 0)));
} else {
final List<TaskLock> partyTimeLocks = taskActionTestKit.getTaskLockbox().findLocksForTask(task).stream().filter(input -> input.getInterval().contains(PARTY_TIME)).collect(Collectors.toList());
Assert.assertEquals(3, partyTimeLocks.size());
assertSameIdentifier(new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), id1.getVersion(), new NumberedShardSpec(0, 0)), id1);
assertSameIdentifier(new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), id1.getVersion(), new NumberedShardSpec(1, 0)), id2);
assertSameIdentifier(new SegmentIdWithShardSpec(DATA_SOURCE, Granularities.HOUR.bucket(PARTY_TIME), id1.getVersion(), new NumberedShardSpec(2, 0)), id3);
}
}
use of org.apache.druid.indexing.common.LockGranularity in project druid by druid-io.
the class SinglePhaseParallelIndexingTest method testRunAndOverwrite.
private void testRunAndOverwrite(@Nullable Interval inputInterval, Granularity secondSegmentGranularity) {
// Ingest all data.
runTestTask(inputInterval, Granularities.DAY, false, Collections.emptyList());
final Collection<DataSegment> allSegments = new HashSet<>(inputInterval == null ? getStorageCoordinator().retrieveAllUsedSegments("dataSource", Segments.ONLY_VISIBLE) : getStorageCoordinator().retrieveUsedSegmentsForInterval("dataSource", inputInterval, Segments.ONLY_VISIBLE));
// Reingest the same data. Each segment should get replaced by a segment with a newer version.
final LockGranularity actualLockGranularity;
if (inputInterval == null) {
actualLockGranularity = LockGranularity.TIME_CHUNK;
} else {
actualLockGranularity = secondSegmentGranularity.equals(Granularities.DAY) ? lockGranularity : LockGranularity.TIME_CHUNK;
}
runOverwriteTask(inputInterval, secondSegmentGranularity, actualLockGranularity);
// Verify that the segment has been replaced.
final Collection<DataSegment> newSegments = inputInterval == null ? getStorageCoordinator().retrieveAllUsedSegments("dataSource", Segments.ONLY_VISIBLE) : getStorageCoordinator().retrieveUsedSegmentsForInterval("dataSource", inputInterval, Segments.ONLY_VISIBLE);
Assert.assertFalse(newSegments.isEmpty());
allSegments.addAll(newSegments);
final VersionedIntervalTimeline<String, DataSegment> timeline = VersionedIntervalTimeline.forSegments(allSegments);
final Interval timelineInterval = inputInterval == null ? Intervals.ETERNITY : inputInterval;
final Set<DataSegment> visibles = timeline.findNonOvershadowedObjectsInInterval(timelineInterval, Partitions.ONLY_COMPLETE);
Assert.assertEquals(new HashSet<>(newSegments), visibles);
}
use of org.apache.druid.indexing.common.LockGranularity in project druid by druid-io.
the class AppenderatorDriverRealtimeIndexTask method run.
@Override
public TaskStatus run(final TaskToolbox toolbox) {
runThread = Thread.currentThread();
authorizerMapper = toolbox.getAuthorizerMapper();
rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
parseExceptionHandler = new ParseExceptionHandler(rowIngestionMeters, spec.getTuningConfig().isLogParseExceptions(), spec.getTuningConfig().getMaxParseExceptions(), spec.getTuningConfig().getMaxSavedParseExceptions());
setupTimeoutAlert();
DataSchema dataSchema = spec.getDataSchema();
RealtimeAppenderatorTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(toolbox.getPersistDir());
final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null), null);
final TaskRealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartmentForMetrics, rowIngestionMeters);
this.metrics = fireDepartmentForMetrics.getMetrics();
final Supplier<Committer> committerSupplier = Committers.nilSupplier();
DiscoveryDruidNode discoveryDruidNode = createDiscoveryDruidNode(toolbox);
appenderator = newAppenderator(dataSchema, tuningConfig, metrics, toolbox);
TaskLockType lockType = getContextValue(Tasks.USE_SHARED_LOCK, false) ? TaskLockType.SHARED : TaskLockType.EXCLUSIVE;
StreamAppenderatorDriver driver = newDriver(dataSchema, appenderator, toolbox, metrics, lockType);
try {
log.debug("Found chat handler of class[%s]", toolbox.getChatHandlerProvider().getClass().getName());
toolbox.getChatHandlerProvider().register(getId(), this, false);
if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
toolbox.getDataSegmentServerAnnouncer().announce();
toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
}
driver.startJob(segmentId -> {
try {
if (lockGranularity == LockGranularity.SEGMENT) {
return toolbox.getTaskActionClient().submit(new SegmentLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), segmentId.getVersion(), segmentId.getShardSpec().getPartitionNum(), 1000L)).isOk();
} else {
final TaskLock lock = toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), 1000L));
if (lock == null) {
return false;
}
if (lock.isRevoked()) {
throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segmentId.getInterval()));
}
return true;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
});
// Set up metrics emission
toolbox.addMonitor(metricsMonitor);
// Delay firehose connection to avoid claiming input resources while the plumber is starting up.
final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
int sequenceNumber = 0;
String sequenceName = makeSequenceName(getId(), sequenceNumber);
final TransactionalSegmentPublisher publisher = (mustBeNullOrEmptyOverwriteSegments, mustBeNullOrEmptyDropSegments, segments, commitMetadata) -> {
if (mustBeNullOrEmptyOverwriteSegments != null && !mustBeNullOrEmptyOverwriteSegments.isEmpty()) {
throw new ISE("Stream ingestion task unexpectedly attempted to overwrite segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyOverwriteSegments));
}
if (mustBeNullOrEmptyDropSegments != null && !mustBeNullOrEmptyDropSegments.isEmpty()) {
throw new ISE("Stream ingestion task unexpectedly attempted to drop segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyDropSegments));
}
final SegmentTransactionalInsertAction action = SegmentTransactionalInsertAction.appendAction(segments, null, null);
return toolbox.getTaskActionClient().submit(action);
};
// Skip connecting firehose if we've been stopped before we got started.
synchronized (this) {
if (!gracefullyStopped) {
firehose = firehoseFactory.connect(Preconditions.checkNotNull(spec.getDataSchema().getParser(), "inputRowParser"), toolbox.getIndexingTmpDir());
}
}
ingestionState = IngestionState.BUILD_SEGMENTS;
// Time to read data!
while (!gracefullyStopped && firehoseDrainableByClosing && firehose.hasMore()) {
try {
InputRow inputRow = firehose.nextRow();
if (inputRow == null) {
log.debug("Discarded null row, considering thrownAway.");
rowIngestionMeters.incrementThrownAway();
} else {
AppenderatorDriverAddResult addResult = driver.add(inputRow, sequenceName, committerSupplier);
if (addResult.isOk()) {
final boolean isPushRequired = addResult.isPushRequired(tuningConfig.getPartitionsSpec().getMaxRowsPerSegment(), tuningConfig.getPartitionsSpec().getMaxTotalRowsOr(DynamicPartitionsSpec.DEFAULT_MAX_TOTAL_ROWS));
if (isPushRequired) {
publishSegments(driver, publisher, committerSupplier, sequenceName);
sequenceNumber++;
sequenceName = makeSequenceName(getId(), sequenceNumber);
}
} else {
// If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks.
throw new ISE("Could not allocate segment for row with timestamp[%s]", inputRow.getTimestamp());
}
}
} catch (ParseException e) {
handleParseException(e);
}
}
ingestionState = IngestionState.COMPLETED;
if (!gracefullyStopped) {
synchronized (this) {
if (gracefullyStopped) {
// Someone called stopGracefully after we checked the flag. That's okay, just stop now.
log.info("Gracefully stopping.");
} else {
finishingJob = true;
}
}
if (finishingJob) {
log.info("Finishing job...");
// Publish any remaining segments
publishSegments(driver, publisher, committerSupplier, sequenceName);
waitForSegmentPublishAndHandoff(tuningConfig.getPublishAndHandoffTimeout());
}
} else if (firehose != null) {
log.info("Task was gracefully stopped, will persist data before exiting");
persistAndWait(driver, committerSupplier.get());
}
} catch (Throwable e) {
log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
errorMsg = Throwables.getStackTraceAsString(e);
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.failure(getId(), errorMsg);
} finally {
toolbox.getChatHandlerProvider().unregister(getId());
CloseableUtils.closeAndSuppressExceptions(firehose, e -> log.warn("Failed to close Firehose"));
appenderator.close();
CloseableUtils.closeAndSuppressExceptions(driver, e -> log.warn("Failed to close AppenderatorDriver"));
toolbox.removeMonitor(metricsMonitor);
if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
toolbox.getDataSegmentServerAnnouncer().unannounce();
toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
}
}
log.info("Job done!");
toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
return TaskStatus.success(getId());
}
Aggregations