use of org.apache.druid.segment.realtime.FireHydrant in project druid by apache.
the class StreamAppenderator method persistAll.
@Override
public ListenableFuture<Object> persistAll(@Nullable final Committer committer) {
throwPersistErrorIfExists();
final Map<String, Integer> currentHydrants = new HashMap<>();
final List<Pair<FireHydrant, SegmentIdWithShardSpec>> indexesToPersist = new ArrayList<>();
int numPersistedRows = 0;
long bytesPersisted = 0L;
MutableLong totalHydrantsCount = new MutableLong();
MutableLong totalHydrantsPersisted = new MutableLong();
final long totalSinks = sinks.size();
for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
final SegmentIdWithShardSpec identifier = entry.getKey();
final Sink sink = entry.getValue();
if (sink == null) {
throw new ISE("No sink for identifier: %s", identifier);
}
final List<FireHydrant> hydrants = Lists.newArrayList(sink);
totalHydrantsCount.add(hydrants.size());
currentHydrants.put(identifier.toString(), hydrants.size());
numPersistedRows += sink.getNumRowsInMemory();
bytesPersisted += sink.getBytesInMemory();
final int limit = sink.isWritable() ? hydrants.size() - 1 : hydrants.size();
// gather hydrants that have not been persisted:
for (FireHydrant hydrant : hydrants.subList(0, limit)) {
if (!hydrant.hasSwapped()) {
log.debug("Hydrant[%s] hasn't persisted yet, persisting. Segment[%s]", hydrant, identifier);
indexesToPersist.add(Pair.of(hydrant, identifier));
totalHydrantsPersisted.add(1);
}
}
if (sink.swappable()) {
// It is swappable. Get the old one to persist it and create a new one:
indexesToPersist.add(Pair.of(sink.swap(), identifier));
totalHydrantsPersisted.add(1);
}
}
log.debug("Submitting persist runnable for dataSource[%s]", schema.getDataSource());
final Object commitMetadata = committer == null ? null : committer.getMetadata();
final Stopwatch runExecStopwatch = Stopwatch.createStarted();
final Stopwatch persistStopwatch = Stopwatch.createStarted();
AtomicLong totalPersistedRows = new AtomicLong(numPersistedRows);
final ListenableFuture<Object> future = persistExecutor.submit(new Callable<Object>() {
@Override
public Object call() throws IOException {
try {
for (Pair<FireHydrant, SegmentIdWithShardSpec> pair : indexesToPersist) {
metrics.incrementRowOutputCount(persistHydrant(pair.lhs, pair.rhs));
}
if (committer != null) {
log.debug("Committing metadata[%s] for sinks[%s].", commitMetadata, Joiner.on(", ").join(currentHydrants.entrySet().stream().map(entry -> StringUtils.format("%s:%d", entry.getKey(), entry.getValue())).collect(Collectors.toList())));
committer.run();
try {
commitLock.lock();
final Map<String, Integer> commitHydrants = new HashMap<>();
final Committed oldCommit = readCommit();
if (oldCommit != null) {
// merge current hydrants with existing hydrants
commitHydrants.putAll(oldCommit.getHydrants());
}
commitHydrants.putAll(currentHydrants);
writeCommit(new Committed(commitHydrants, commitMetadata));
} finally {
commitLock.unlock();
}
}
log.info("Flushed in-memory data with commit metadata [%s] for segments: %s", commitMetadata, indexesToPersist.stream().map(itp -> itp.rhs.asSegmentId().toString()).distinct().collect(Collectors.joining(", ")));
log.info("Persisted stats: processed rows: [%d], persisted rows[%d], sinks: [%d], total fireHydrants (across sinks): [%d], persisted fireHydrants (across sinks): [%d]", rowIngestionMeters.getProcessed(), totalPersistedRows.get(), totalSinks, totalHydrantsCount.longValue(), totalHydrantsPersisted.longValue());
// return null if committer is null
return commitMetadata;
} catch (IOException e) {
metrics.incrementFailedPersists();
throw e;
} finally {
metrics.incrementNumPersists();
metrics.incrementPersistTimeMillis(persistStopwatch.elapsed(TimeUnit.MILLISECONDS));
persistStopwatch.stop();
}
}
});
final long startDelay = runExecStopwatch.elapsed(TimeUnit.MILLISECONDS);
metrics.incrementPersistBackPressureMillis(startDelay);
if (startDelay > WARN_DELAY) {
log.warn("Ingestion was throttled for [%,d] millis because persists were pending.", startDelay);
}
runExecStopwatch.stop();
resetNextFlush();
// NB: The rows are still in memory until they're done persisting, but we only count rows in active indexes.
rowsCurrentlyInMemory.addAndGet(-numPersistedRows);
bytesCurrentlyInMemory.addAndGet(-bytesPersisted);
log.info("Persisted rows[%,d] and (estimated) bytes[%,d]", numPersistedRows, bytesPersisted);
return future;
}
use of org.apache.druid.segment.realtime.FireHydrant in project druid by apache.
the class StreamAppenderator method abandonSegment.
private ListenableFuture<?> abandonSegment(final SegmentIdWithShardSpec identifier, final Sink sink, final boolean removeOnDiskData) {
// Ensure no future writes will be made to this sink.
if (sink.finishWriting()) {
// Decrement this sink's rows from the counters. we only count active sinks so that we don't double decrement,
// i.e. those that haven't been persisted for *InMemory counters, or pushed to deep storage for the total counter.
rowsCurrentlyInMemory.addAndGet(-sink.getNumRowsInMemory());
bytesCurrentlyInMemory.addAndGet(-sink.getBytesInMemory());
bytesCurrentlyInMemory.addAndGet(-calculateSinkMemoryInUsed(sink));
for (FireHydrant hydrant : sink) {
// Decrement memory used by all Memory Mapped Hydrant
if (!hydrant.equals(sink.getCurrHydrant())) {
bytesCurrentlyInMemory.addAndGet(-calculateMMappedHydrantMemoryInUsed(hydrant));
}
}
totalRows.addAndGet(-sink.getNumRows());
}
// Mark this identifier as dropping, so no future push tasks will pick it up.
droppingSinks.add(identifier);
// Wait for any outstanding pushes to finish, then abandon the segment inside the persist thread.
return Futures.transform(pushBarrier(), new Function<Object, Void>() {
@Nullable
@Override
public Void apply(@Nullable Object input) {
if (!sinks.remove(identifier, sink)) {
log.error("Sink for segment[%s] no longer valid, not abandoning.", identifier);
return null;
}
metrics.setSinkCount(sinks.size());
if (removeOnDiskData) {
// Remove this segment from the committed list. This must be done from the persist thread.
log.debug("Removing commit metadata for segment[%s].", identifier);
try {
commitLock.lock();
final Committed oldCommit = readCommit();
if (oldCommit != null) {
writeCommit(oldCommit.without(identifier.toString()));
}
} catch (Exception e) {
log.makeAlert(e, "Failed to update committed segments[%s]", schema.getDataSource()).addData("identifier", identifier.toString()).emit();
throw new RuntimeException(e);
} finally {
commitLock.unlock();
}
}
// Unannounce the segment.
try {
segmentAnnouncer.unannounceSegment(sink.getSegment());
} catch (Exception e) {
log.makeAlert(e, "Failed to unannounce segment[%s]", schema.getDataSource()).addData("identifier", identifier.toString()).emit();
}
droppingSinks.remove(identifier);
sinkTimeline.remove(sink.getInterval(), sink.getVersion(), identifier.getShardSpec().createChunk(sink));
for (FireHydrant hydrant : sink) {
if (cache != null) {
cache.close(SinkQuerySegmentWalker.makeHydrantCacheIdentifier(hydrant));
}
hydrant.swapSegment(null);
}
if (removeOnDiskData) {
removeDirectory(computePersistDir(identifier));
}
log.info("Dropped segment[%s].", identifier);
return null;
}
}, // starting to abandon segments
persistExecutor);
}
use of org.apache.druid.segment.realtime.FireHydrant in project druid by apache.
the class BatchAppenderator method persistAll.
@Override
public ListenableFuture<Object> persistAll(@Nullable final Committer committer) {
throwPersistErrorIfExists();
if (committer != null) {
throw new ISE("committer must be null for BatchAppenderator");
}
// Get ready to persist all sinks:
final Map<SegmentIdWithShardSpec, Sink> sinksToPersist = swapSinks();
final Stopwatch runExecStopwatch = Stopwatch.createStarted();
ListenableFuture<Object> future = persistExecutor.submit(() -> {
log.info("Spawning intermediate persist");
// figure out hydrants (indices) to persist:
final List<Pair<FireHydrant, SegmentIdWithShardSpec>> indexesToPersist = new ArrayList<>();
int numPersistedRows = 0;
long bytesPersisted = 0;
int totalHydrantsCount = 0;
final long totalSinks = sinksToPersist.size();
for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinksToPersist.entrySet()) {
final SegmentIdWithShardSpec identifier = entry.getKey();
final Sink sink = entry.getValue();
if (sink == null) {
throw new ISE("No sink for identifier: %s", identifier);
}
final List<FireHydrant> hydrants = Lists.newArrayList(sink);
// Since everytime we persist we also get rid of the in-memory references to sink & hydrants
// the invariant of exactly one, always swappable, sink with exactly one unpersisted hydrant must hold
int totalHydrantsForSink = hydrants.size();
if (totalHydrantsForSink != 1) {
throw new ISE("There should be only one hydrant for identifier[%s] but there are[%s]", identifier, totalHydrantsForSink);
}
totalHydrantsCount++;
numPersistedRows += sink.getNumRowsInMemory();
bytesPersisted += sink.getBytesInMemory();
if (!sink.swappable()) {
throw new ISE("Sink is not swappable![%s]", identifier);
}
indexesToPersist.add(Pair.of(sink.swap(), identifier));
}
if (indexesToPersist.isEmpty()) {
log.info("No indexes will be persisted");
}
final Stopwatch persistStopwatch = Stopwatch.createStarted();
try {
for (Pair<FireHydrant, SegmentIdWithShardSpec> pair : indexesToPersist) {
metrics.incrementRowOutputCount(persistHydrant(pair.lhs, pair.rhs));
}
log.info("Persisted in-memory data for segments: %s", indexesToPersist.stream().filter(itp -> itp.rhs != null).map(itp -> itp.rhs.asSegmentId().toString()).distinct().collect(Collectors.joining(", ")));
log.info("Persisted stats: processed rows: [%d], persisted rows[%d], persisted sinks: [%d], persisted fireHydrants (across sinks): [%d]", rowIngestionMeters.getProcessed(), numPersistedRows, totalSinks, totalHydrantsCount);
// note that we do not need to reset sinks metadata since we did it at the start...
} catch (Exception e) {
metrics.incrementFailedPersists();
throw e;
} finally {
metrics.incrementNumPersists();
long persistMillis = persistStopwatch.elapsed(TimeUnit.MILLISECONDS);
metrics.incrementPersistTimeMillis(persistMillis);
persistStopwatch.stop();
// make sure no push can start while persisting:
log.info("Persisted rows[%,d] and bytes[%,d] and removed all sinks & hydrants from memory in[%d] millis", numPersistedRows, bytesPersisted, persistMillis);
log.info("Persist is done.");
}
return null;
});
final long startDelay = runExecStopwatch.elapsed(TimeUnit.MILLISECONDS);
metrics.incrementPersistBackPressureMillis(startDelay);
if (startDelay > PERSIST_WARN_DELAY) {
log.warn("Ingestion was throttled for [%,d] millis because persists were pending.", startDelay);
}
runExecStopwatch.stop();
return future;
}
use of org.apache.druid.segment.realtime.FireHydrant in project druid by apache.
the class BatchAppenderator method mergeAndPush.
/**
* Merge segment, push to deep storage. Should only be used on segments that have been fully persisted.
*
* @param identifier sink identifier
* @param sink sink to push
* @return segment descriptor, or null if the sink is no longer valid
*/
@Nullable
private DataSegment mergeAndPush(final SegmentIdWithShardSpec identifier, final Sink sink) {
// Use a descriptor file to indicate that pushing has completed.
final File persistDir = computePersistDir(identifier);
final File mergedTarget = new File(persistDir, "merged");
final File descriptorFile = computeDescriptorFile(identifier);
// Sanity checks
if (sink.isWritable()) {
throw new ISE("Expected sink to be no longer writable before mergeAndPush for segment[%s].", identifier);
}
int numHydrants = 0;
for (FireHydrant hydrant : sink) {
if (!hydrant.hasSwapped()) {
throw new ISE("Expected sink to be fully persisted before mergeAndPush for segment[%s].", identifier);
}
numHydrants++;
}
SinkMetadata sm = sinksMetadata.get(identifier);
if (sm == null) {
log.warn("Sink metadata not found just before merge for identifier [%s]", identifier);
} else if (numHydrants != sm.getNumHydrants()) {
throw new ISE("Number of restored hydrants[%d] for identifier[%s] does not match expected value[%d]", numHydrants, identifier, sm.getNumHydrants());
}
try {
if (descriptorFile.exists()) {
// Already pushed.
log.info("Segment[%s] already pushed, skipping.", identifier);
return objectMapper.readValue(descriptorFile, DataSegment.class);
}
removeDirectory(mergedTarget);
if (mergedTarget.exists()) {
throw new ISE("Merged target[%s] exists after removing?!", mergedTarget);
}
final File mergedFile;
final long mergeFinishTime;
final long startTime = System.nanoTime();
List<QueryableIndex> indexes = new ArrayList<>();
Closer closer = Closer.create();
try {
for (FireHydrant fireHydrant : sink) {
Pair<ReferenceCountingSegment, Closeable> segmentAndCloseable = fireHydrant.getAndIncrementSegment();
final QueryableIndex queryableIndex = segmentAndCloseable.lhs.asQueryableIndex();
log.debug("Segment[%s] adding hydrant[%s]", identifier, fireHydrant);
indexes.add(queryableIndex);
closer.register(segmentAndCloseable.rhs);
}
mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), schema.getDimensionsSpec(), mergedTarget, tuningConfig.getIndexSpec(), tuningConfig.getIndexSpecForIntermediatePersists(), new BaseProgressIndicator(), tuningConfig.getSegmentWriteOutMediumFactory(), tuningConfig.getMaxColumnsToMerge());
mergeFinishTime = System.nanoTime();
log.debug("Segment[%s] built in %,dms.", identifier, (mergeFinishTime - startTime) / 1000000);
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
// Retry pushing segments because uploading to deep storage might fail especially for cloud storage types
final DataSegment segment = RetryUtils.retry(// This appenderator is used only for the local indexing task so unique paths are not required
() -> dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(IndexMerger.getMergedDimensionsFromQueryableIndexes(indexes, schema.getDimensionsSpec())), false), exception -> exception instanceof Exception, 5);
// can generate OOMs during merge if enough of them are held back...
for (FireHydrant fireHydrant : sink) {
fireHydrant.swapSegment(null);
}
// cleanup, sink no longer needed
removeDirectory(computePersistDir(identifier));
final long pushFinishTime = System.nanoTime();
log.info("Segment[%s] of %,d bytes " + "built from %d incremental persist(s) in %,dms; " + "pushed to deep storage in %,dms. " + "Load spec is: %s", identifier, segment.getSize(), indexes.size(), (mergeFinishTime - startTime) / 1000000, (pushFinishTime - mergeFinishTime) / 1000000, objectMapper.writeValueAsString(segment.getLoadSpec()));
return segment;
} catch (Exception e) {
metrics.incrementFailedHandoffs();
log.warn(e, "Failed to push merged index for segment[%s].", identifier);
throw new RuntimeException(e);
}
}
use of org.apache.druid.segment.realtime.FireHydrant in project druid by apache.
the class BatchAppenderator method clearSinkMemoryCountersAndDiskStoredData.
// This function does not remove the sink from its tracking Map (sinks), the caller is responsible for that
// this is because the Map is not synchronized and removing elements from a map while traversing it
// throws concurrent access exception
private void clearSinkMemoryCountersAndDiskStoredData(final SegmentIdWithShardSpec identifier, final Sink sink, final boolean removeOnDiskData) {
// Ensure no future writes will be made to this sink.
if (sink.finishWriting()) {
// Decrement this sink's rows from the counters. we only count active sinks so that we don't double decrement,
// i.e. those that haven't been persisted for *InMemory counters, or pushed to deep storage for the total counter.
rowsCurrentlyInMemory -= sink.getNumRowsInMemory();
bytesCurrentlyInMemory -= sink.getBytesInMemory();
bytesCurrentlyInMemory -= calculateSinkMemoryInUsed();
for (FireHydrant hydrant : sink) {
// Decrement memory used by all Memory Mapped Hydrant
if (!hydrant.equals(sink.getCurrHydrant())) {
bytesCurrentlyInMemory -= calculateMemoryUsedByHydrant();
}
}
// totalRows are not decremented when removing the sink from memory, sink was just persisted, and it
// still "lives" but it is in hibernation. It will be revived later just before push.
}
if (removeOnDiskData) {
removeDirectory(computePersistDir(identifier));
}
log.info("Removed sink for segment[%s].", identifier);
}
Aggregations