use of org.apache.druid.segment.realtime.FireHydrant in project druid by druid-io.
the class StreamAppenderator method abandonSegment.
private ListenableFuture<?> abandonSegment(final SegmentIdWithShardSpec identifier, final Sink sink, final boolean removeOnDiskData) {
// Ensure no future writes will be made to this sink.
if (sink.finishWriting()) {
// Decrement this sink's rows from the counters. we only count active sinks so that we don't double decrement,
// i.e. those that haven't been persisted for *InMemory counters, or pushed to deep storage for the total counter.
rowsCurrentlyInMemory.addAndGet(-sink.getNumRowsInMemory());
bytesCurrentlyInMemory.addAndGet(-sink.getBytesInMemory());
bytesCurrentlyInMemory.addAndGet(-calculateSinkMemoryInUsed(sink));
for (FireHydrant hydrant : sink) {
// Decrement memory used by all Memory Mapped Hydrant
if (!hydrant.equals(sink.getCurrHydrant())) {
bytesCurrentlyInMemory.addAndGet(-calculateMMappedHydrantMemoryInUsed(hydrant));
}
}
totalRows.addAndGet(-sink.getNumRows());
}
// Mark this identifier as dropping, so no future push tasks will pick it up.
droppingSinks.add(identifier);
// Wait for any outstanding pushes to finish, then abandon the segment inside the persist thread.
return Futures.transform(pushBarrier(), new Function<Object, Void>() {
@Nullable
@Override
public Void apply(@Nullable Object input) {
if (!sinks.remove(identifier, sink)) {
log.error("Sink for segment[%s] no longer valid, not abandoning.", identifier);
return null;
}
metrics.setSinkCount(sinks.size());
if (removeOnDiskData) {
// Remove this segment from the committed list. This must be done from the persist thread.
log.debug("Removing commit metadata for segment[%s].", identifier);
try {
commitLock.lock();
final Committed oldCommit = readCommit();
if (oldCommit != null) {
writeCommit(oldCommit.without(identifier.toString()));
}
} catch (Exception e) {
log.makeAlert(e, "Failed to update committed segments[%s]", schema.getDataSource()).addData("identifier", identifier.toString()).emit();
throw new RuntimeException(e);
} finally {
commitLock.unlock();
}
}
// Unannounce the segment.
try {
segmentAnnouncer.unannounceSegment(sink.getSegment());
} catch (Exception e) {
log.makeAlert(e, "Failed to unannounce segment[%s]", schema.getDataSource()).addData("identifier", identifier.toString()).emit();
}
droppingSinks.remove(identifier);
sinkTimeline.remove(sink.getInterval(), sink.getVersion(), identifier.getShardSpec().createChunk(sink));
for (FireHydrant hydrant : sink) {
if (cache != null) {
cache.close(SinkQuerySegmentWalker.makeHydrantCacheIdentifier(hydrant));
}
hydrant.swapSegment(null);
}
if (removeOnDiskData) {
removeDirectory(computePersistDir(identifier));
}
log.info("Dropped segment[%s].", identifier);
return null;
}
}, // starting to abandon segments
persistExecutor);
}
use of org.apache.druid.segment.realtime.FireHydrant in project druid by druid-io.
the class RealtimePlumber method bootstrapSinksFromDisk.
protected Object bootstrapSinksFromDisk() {
final VersioningPolicy versioningPolicy = config.getVersioningPolicy();
File baseDir = computeBaseDir(schema);
if (baseDir == null || !baseDir.exists()) {
return null;
}
File[] files = baseDir.listFiles();
if (files == null) {
return null;
}
Object metadata = null;
long latestCommitTime = 0;
for (File sinkDir : files) {
final Interval sinkInterval = Intervals.of(sinkDir.getName().replace('_', '/'));
// final File[] sinkFiles = sinkDir.listFiles();
// To avoid reading and listing of "merged" dir
final File[] sinkFiles = sinkDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String fileName) {
return !(Ints.tryParse(fileName) == null);
}
});
Arrays.sort(sinkFiles, new Comparator<File>() {
@Override
public int compare(File o1, File o2) {
try {
return Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName()));
} catch (NumberFormatException e) {
log.error(e, "Couldn't compare as numbers? [%s][%s]", o1, o2);
return o1.compareTo(o2);
}
}
});
boolean isCorrupted = false;
List<FireHydrant> hydrants = new ArrayList<>();
for (File segmentDir : sinkFiles) {
log.info("Loading previously persisted segment at [%s]", segmentDir);
// If 100% sure that this is not needed, this check can be removed.
if (Ints.tryParse(segmentDir.getName()) == null) {
continue;
}
QueryableIndex queryableIndex = null;
try {
queryableIndex = indexIO.loadIndex(segmentDir);
} catch (IOException e) {
log.error(e, "Problem loading segmentDir from disk.");
isCorrupted = true;
}
if (isCorrupted) {
try {
File corruptSegmentDir = computeCorruptedFileDumpDir(segmentDir, schema);
log.info("Renaming %s to %s", segmentDir.getAbsolutePath(), corruptSegmentDir.getAbsolutePath());
org.apache.commons.io.FileUtils.copyDirectory(segmentDir, corruptSegmentDir);
FileUtils.deleteDirectory(segmentDir);
} catch (Exception e1) {
log.error(e1, "Failed to rename %s", segmentDir.getAbsolutePath());
}
// at some point.
continue;
}
Metadata segmentMetadata = queryableIndex.getMetadata();
if (segmentMetadata != null) {
Object timestampObj = segmentMetadata.get(COMMIT_METADATA_TIMESTAMP_KEY);
if (timestampObj != null) {
long timestamp = ((Long) timestampObj).longValue();
if (timestamp > latestCommitTime) {
log.info("Found metaData [%s] with latestCommitTime [%s] greater than previous recorded [%s]", queryableIndex.getMetadata(), timestamp, latestCommitTime);
latestCommitTime = timestamp;
metadata = queryableIndex.getMetadata().get(COMMIT_METADATA_KEY);
}
}
}
hydrants.add(new FireHydrant(new QueryableIndexSegment(queryableIndex, SegmentId.of(schema.getDataSource(), sinkInterval, versioningPolicy.getVersion(sinkInterval), config.getShardSpec())), Integer.parseInt(segmentDir.getName())));
}
if (hydrants.isEmpty()) {
// Probably encountered a corrupt sink directory
log.warn("Found persisted segment directory with no intermediate segments present at %s, skipping sink creation.", sinkDir.getAbsolutePath());
continue;
}
final Sink currSink = new Sink(sinkInterval, schema, config.getShardSpec(), versioningPolicy.getVersion(sinkInterval), config.getAppendableIndexSpec(), config.getMaxRowsInMemory(), config.getMaxBytesInMemoryOrDefault(), true, config.getDedupColumn(), hydrants);
addSink(currSink);
}
return metadata;
}
use of org.apache.druid.segment.realtime.FireHydrant in project druid by druid-io.
the class RealtimePlumber method persistAndMerge.
// Submits persist-n-merge task for a Sink to the mergeExecutor
private void persistAndMerge(final long truncatedTime, final Sink sink) {
final String threadName = StringUtils.format("%s-%s-persist-n-merge", schema.getDataSource(), DateTimes.utc(truncatedTime));
mergeExecutor.execute(new ThreadRenamingRunnable(threadName) {
final Interval interval = sink.getInterval();
Stopwatch mergeStopwatch = null;
@Override
public void doRun() {
try {
// Bail out if this sink has been abandoned by a previously-executed task.
if (sinks.get(truncatedTime) != sink) {
log.info("Sink[%s] was abandoned, bailing out of persist-n-merge.", sink);
return;
}
// Use a file to indicate that pushing has completed.
final File persistDir = computePersistDir(schema, interval);
final File mergedTarget = new File(persistDir, "merged");
final File isPushedMarker = new File(persistDir, "isPushedMarker");
if (!isPushedMarker.exists()) {
removeSegment(sink, mergedTarget);
if (mergedTarget.exists()) {
log.warn("Merged target[%s] still exists after attempt to delete it; skipping push.", mergedTarget);
return;
}
} else {
log.info("Already pushed sink[%s]", sink);
return;
}
/*
Note: it the plumber crashes after persisting a subset of hydrants then might duplicate data as these
hydrants will be read but older commitMetadata will be used. fixing this possibly needs structural
changes to plumber.
*/
for (FireHydrant hydrant : sink) {
synchronized (hydrant) {
if (!hydrant.hasSwapped()) {
log.info("Hydrant[%s] hasn't swapped yet, swapping. Sink[%s]", hydrant, sink);
final int rowCount = persistHydrant(hydrant, schema, interval, null);
metrics.incrementRowOutputCount(rowCount);
}
}
}
final long mergeThreadCpuTime = JvmUtils.safeGetThreadCpuTime();
mergeStopwatch = Stopwatch.createStarted();
final File mergedFile;
List<QueryableIndex> indexes = new ArrayList<>();
Closer closer = Closer.create();
try {
for (FireHydrant fireHydrant : sink) {
Pair<ReferenceCountingSegment, Closeable> segmentAndCloseable = fireHydrant.getAndIncrementSegment();
final QueryableIndex queryableIndex = segmentAndCloseable.lhs.asQueryableIndex();
log.info("Adding hydrant[%s]", fireHydrant);
indexes.add(queryableIndex);
closer.register(segmentAndCloseable.rhs);
}
mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), null, mergedTarget, config.getIndexSpec(), config.getIndexSpecForIntermediatePersists(), new BaseProgressIndicator(), config.getSegmentWriteOutMediumFactory(), -1);
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
// emit merge metrics before publishing segment
metrics.incrementMergeCpuTime(JvmUtils.safeGetThreadCpuTime() - mergeThreadCpuTime);
metrics.incrementMergeTimeMillis(mergeStopwatch.elapsed(TimeUnit.MILLISECONDS));
log.info("Pushing [%s] to deep storage", sink.getSegment().getId());
DataSegment segment = dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(IndexMerger.getMergedDimensionsFromQueryableIndexes(indexes, schema.getDimensionsSpec())), false);
log.info("Inserting [%s] to the metadata store", sink.getSegment().getId());
segmentPublisher.publishSegment(segment);
if (!isPushedMarker.createNewFile()) {
log.makeAlert("Failed to create marker file for [%s]", schema.getDataSource()).addData("interval", sink.getInterval()).addData("partitionNum", segment.getShardSpec().getPartitionNum()).addData("marker", isPushedMarker).emit();
}
} catch (Exception e) {
metrics.incrementFailedHandoffs();
log.makeAlert(e, "Failed to persist merged index[%s]", schema.getDataSource()).addData("interval", interval).emit();
if (shuttingDown) {
// We're trying to shut down, and this segment failed to push. Let's just get rid of it.
// This call will also delete possibly-partially-written files, so we don't need to do it explicitly.
cleanShutdown = false;
abandonSegment(truncatedTime, sink);
}
} finally {
if (mergeStopwatch != null) {
mergeStopwatch.stop();
}
}
}
});
handoffNotifier.registerSegmentHandoffCallback(new SegmentDescriptor(sink.getInterval(), sink.getVersion(), config.getShardSpec().getPartitionNum()), mergeExecutor, new Runnable() {
@Override
public void run() {
abandonSegment(sink.getInterval().getStartMillis(), sink);
metrics.incrementHandOffCount();
}
});
}
Aggregations