use of org.apache.druid.segment.BaseProgressIndicator in project druid by druid-io.
the class BatchAppenderator method mergeAndPush.
/**
* Merge segment, push to deep storage. Should only be used on segments that have been fully persisted.
*
* @param identifier sink identifier
* @param sink sink to push
* @return segment descriptor, or null if the sink is no longer valid
*/
@Nullable
private DataSegment mergeAndPush(final SegmentIdWithShardSpec identifier, final Sink sink) {
// Use a descriptor file to indicate that pushing has completed.
final File persistDir = computePersistDir(identifier);
final File mergedTarget = new File(persistDir, "merged");
final File descriptorFile = computeDescriptorFile(identifier);
// Sanity checks
if (sink.isWritable()) {
throw new ISE("Expected sink to be no longer writable before mergeAndPush for segment[%s].", identifier);
}
int numHydrants = 0;
for (FireHydrant hydrant : sink) {
if (!hydrant.hasSwapped()) {
throw new ISE("Expected sink to be fully persisted before mergeAndPush for segment[%s].", identifier);
}
numHydrants++;
}
SinkMetadata sm = sinksMetadata.get(identifier);
if (sm == null) {
log.warn("Sink metadata not found just before merge for identifier [%s]", identifier);
} else if (numHydrants != sm.getNumHydrants()) {
throw new ISE("Number of restored hydrants[%d] for identifier[%s] does not match expected value[%d]", numHydrants, identifier, sm.getNumHydrants());
}
try {
if (descriptorFile.exists()) {
// Already pushed.
log.info("Segment[%s] already pushed, skipping.", identifier);
return objectMapper.readValue(descriptorFile, DataSegment.class);
}
removeDirectory(mergedTarget);
if (mergedTarget.exists()) {
throw new ISE("Merged target[%s] exists after removing?!", mergedTarget);
}
final File mergedFile;
final long mergeFinishTime;
final long startTime = System.nanoTime();
List<QueryableIndex> indexes = new ArrayList<>();
Closer closer = Closer.create();
try {
for (FireHydrant fireHydrant : sink) {
Pair<ReferenceCountingSegment, Closeable> segmentAndCloseable = fireHydrant.getAndIncrementSegment();
final QueryableIndex queryableIndex = segmentAndCloseable.lhs.asQueryableIndex();
log.debug("Segment[%s] adding hydrant[%s]", identifier, fireHydrant);
indexes.add(queryableIndex);
closer.register(segmentAndCloseable.rhs);
}
mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), schema.getDimensionsSpec(), mergedTarget, tuningConfig.getIndexSpec(), tuningConfig.getIndexSpecForIntermediatePersists(), new BaseProgressIndicator(), tuningConfig.getSegmentWriteOutMediumFactory(), tuningConfig.getMaxColumnsToMerge());
mergeFinishTime = System.nanoTime();
log.debug("Segment[%s] built in %,dms.", identifier, (mergeFinishTime - startTime) / 1000000);
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
// Retry pushing segments because uploading to deep storage might fail especially for cloud storage types
final DataSegment segment = RetryUtils.retry(// This appenderator is used only for the local indexing task so unique paths are not required
() -> dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(IndexMerger.getMergedDimensionsFromQueryableIndexes(indexes, schema.getDimensionsSpec())), false), exception -> exception instanceof Exception, 5);
// can generate OOMs during merge if enough of them are held back...
for (FireHydrant fireHydrant : sink) {
fireHydrant.swapSegment(null);
}
// cleanup, sink no longer needed
removeDirectory(computePersistDir(identifier));
final long pushFinishTime = System.nanoTime();
log.info("Segment[%s] of %,d bytes " + "built from %d incremental persist(s) in %,dms; " + "pushed to deep storage in %,dms. " + "Load spec is: %s", identifier, segment.getSize(), indexes.size(), (mergeFinishTime - startTime) / 1000000, (pushFinishTime - mergeFinishTime) / 1000000, objectMapper.writeValueAsString(segment.getLoadSpec()));
return segment;
} catch (Exception e) {
metrics.incrementFailedHandoffs();
log.warn(e, "Failed to push merged index for segment[%s].", identifier);
throw new RuntimeException(e);
}
}
Aggregations