use of io.druid.segment.realtime.FireHydrant in project druid by druid-io.
the class AppenderatorImpl method bootstrapSinksFromDisk.
/**
* Populate "sinks" and "sinkTimeline" with committed segments, and announce them with the segmentAnnouncer.
*
* @return persisted commit metadata
*/
private Object bootstrapSinksFromDisk() {
Preconditions.checkState(sinks.isEmpty(), "Already bootstrapped?!");
final File baseDir = tuningConfig.getBasePersistDirectory();
if (!baseDir.exists()) {
return null;
}
final File[] files = baseDir.listFiles();
if (files == null) {
return null;
}
final File commitFile = computeCommitFile();
final Committed committed;
try {
if (commitFile.exists()) {
committed = objectMapper.readValue(commitFile, Committed.class);
} else {
committed = Committed.nil();
}
} catch (Exception e) {
throw new ISE(e, "Failed to read commitFile: %s", commitFile);
}
log.info("Loading sinks from[%s]: %s", baseDir, committed.getHydrants().keySet());
for (File sinkDir : files) {
final File identifierFile = new File(sinkDir, IDENTIFIER_FILE_NAME);
if (!identifierFile.isFile()) {
// No identifier in this sinkDir; it must not actually be a sink directory. Skip it.
continue;
}
try {
final SegmentIdentifier identifier = objectMapper.readValue(new File(sinkDir, "identifier.json"), SegmentIdentifier.class);
final int committedHydrants = committed.getCommittedHydrants(identifier.getIdentifierAsString());
if (committedHydrants <= 0) {
log.info("Removing uncommitted sink at [%s]", sinkDir);
FileUtils.deleteDirectory(sinkDir);
continue;
}
// To avoid reading and listing of "merged" dir and other special files
final File[] sinkFiles = sinkDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String fileName) {
return !(Ints.tryParse(fileName) == null);
}
});
Arrays.sort(sinkFiles, new Comparator<File>() {
@Override
public int compare(File o1, File o2) {
return Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName()));
}
});
List<FireHydrant> hydrants = Lists.newArrayList();
for (File hydrantDir : sinkFiles) {
final int hydrantNumber = Integer.parseInt(hydrantDir.getName());
if (hydrantNumber >= committedHydrants) {
log.info("Removing uncommitted segment at [%s]", hydrantDir);
FileUtils.deleteDirectory(hydrantDir);
} else {
log.info("Loading previously persisted segment at [%s]", hydrantDir);
if (hydrantNumber != hydrants.size()) {
throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
}
hydrants.add(new FireHydrant(new QueryableIndexSegment(identifier.getIdentifierAsString(), indexIO.loadIndex(hydrantDir)), hydrantNumber));
}
}
// Make sure we loaded enough hydrants.
if (committedHydrants != hydrants.size()) {
throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
}
Sink currSink = new Sink(identifier.getInterval(), schema, identifier.getShardSpec(), identifier.getVersion(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions(), hydrants);
sinks.put(identifier, currSink);
sinkTimeline.add(currSink.getInterval(), currSink.getVersion(), identifier.getShardSpec().createChunk(currSink));
segmentAnnouncer.announceSegment(currSink.getSegment());
} catch (IOException e) {
log.makeAlert(e, "Problem loading sink[%s] from disk.", schema.getDataSource()).addData("sinkDir", sinkDir).emit();
}
}
// Make sure we loaded all committed sinks.
final Set<String> loadedSinks = Sets.newHashSet(Iterables.transform(sinks.keySet(), new Function<SegmentIdentifier, String>() {
@Override
public String apply(SegmentIdentifier input) {
return input.getIdentifierAsString();
}
}));
final Set<String> missingSinks = Sets.difference(committed.getHydrants().keySet(), loadedSinks);
if (!missingSinks.isEmpty()) {
throw new ISE("Missing committed sinks [%s]", Joiner.on(", ").join(missingSinks));
}
return committed.getMetadata();
}
use of io.druid.segment.realtime.FireHydrant in project druid by druid-io.
the class AppenderatorImpl method mergeAndPush.
/**
* Merge segment, push to deep storage. Should only be used on segments that have been fully persisted. Must only
* be run in the single-threaded pushExecutor.
*
* @param identifier sink identifier
* @param sink sink to push
*
* @return segment descriptor, or null if the sink is no longer valid
*/
private DataSegment mergeAndPush(final SegmentIdentifier identifier, final Sink sink) {
// Bail out if this sink is null or otherwise not what we expect.
if (sinks.get(identifier) != sink) {
log.warn("Sink for segment[%s] no longer valid, bailing out of mergeAndPush.", identifier);
return null;
}
// Use a descriptor file to indicate that pushing has completed.
final File persistDir = computePersistDir(identifier);
final File mergedTarget = new File(persistDir, "merged");
final File descriptorFile = computeDescriptorFile(identifier);
// Sanity checks
for (FireHydrant hydrant : sink) {
if (sink.isWritable()) {
throw new ISE("WTF?! Expected sink to be no longer writable before mergeAndPush. Segment[%s].", identifier);
}
synchronized (hydrant) {
if (!hydrant.hasSwapped()) {
throw new ISE("WTF?! Expected sink to be fully persisted before mergeAndPush. Segment[%s].", identifier);
}
}
}
try {
if (descriptorFile.exists()) {
// Already pushed.
log.info("Segment[%s] already pushed.", identifier);
return objectMapper.readValue(descriptorFile, DataSegment.class);
}
log.info("Pushing merged index for segment[%s].", identifier);
removeDirectory(mergedTarget);
if (mergedTarget.exists()) {
throw new ISE("Merged target[%s] exists after removing?!", mergedTarget);
}
List<QueryableIndex> indexes = Lists.newArrayList();
for (FireHydrant fireHydrant : sink) {
Segment segment = fireHydrant.getSegment();
final QueryableIndex queryableIndex = segment.asQueryableIndex();
log.info("Adding hydrant[%s]", fireHydrant);
indexes.add(queryableIndex);
}
final File mergedFile;
mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), mergedTarget, tuningConfig.getIndexSpec());
QueryableIndex index = indexIO.loadIndex(mergedFile);
DataSegment segment = dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(Lists.newArrayList(index.getAvailableDimensions())));
objectMapper.writeValue(descriptorFile, segment);
log.info("Pushed merged index for segment[%s], descriptor is: %s", identifier, segment);
return segment;
} catch (Exception e) {
metrics.incrementFailedHandoffs();
log.warn(e, "Failed to push merged index for segment[%s].", identifier);
throw Throwables.propagate(e);
}
}
use of io.druid.segment.realtime.FireHydrant in project druid by druid-io.
the class AppenderatorImpl method persistAll.
@Override
public ListenableFuture<Object> persistAll(final Committer committer) {
// Submit persistAll task to the persistExecutor
final Map<SegmentIdentifier, Integer> commitHydrants = Maps.newHashMap();
final List<Pair<FireHydrant, SegmentIdentifier>> indexesToPersist = Lists.newArrayList();
final Set<SegmentIdentifier> identifiers = sinks.keySet();
for (SegmentIdentifier identifier : identifiers) {
final Sink sink = sinks.get(identifier);
final List<FireHydrant> hydrants = Lists.newArrayList(sink);
commitHydrants.put(identifier, hydrants.size());
final int limit = sink.isWritable() ? hydrants.size() - 1 : hydrants.size();
for (FireHydrant hydrant : hydrants.subList(0, limit)) {
if (!hydrant.hasSwapped()) {
log.info("Hydrant[%s] hasn't persisted yet, persisting. Segment[%s]", hydrant, identifier);
indexesToPersist.add(Pair.of(hydrant, identifier));
}
}
if (sink.swappable()) {
indexesToPersist.add(Pair.of(sink.swap(), identifier));
}
}
log.info("Submitting persist runnable for dataSource[%s]", schema.getDataSource());
final String threadName = String.format("%s-incremental-persist", schema.getDataSource());
final Object commitMetadata = committer.getMetadata();
final Stopwatch runExecStopwatch = Stopwatch.createStarted();
final Stopwatch persistStopwatch = Stopwatch.createStarted();
final ListenableFuture<Object> future = persistExecutor.submit(new ThreadRenamingCallable<Object>(threadName) {
@Override
public Object doCall() {
try {
for (Pair<FireHydrant, SegmentIdentifier> pair : indexesToPersist) {
metrics.incrementRowOutputCount(persistHydrant(pair.lhs, pair.rhs));
}
log.info("Committing metadata[%s] for sinks[%s].", commitMetadata, Joiner.on(", ").join(Iterables.transform(commitHydrants.entrySet(), new Function<Map.Entry<SegmentIdentifier, Integer>, String>() {
@Override
public String apply(Map.Entry<SegmentIdentifier, Integer> entry) {
return String.format("%s:%d", entry.getKey().getIdentifierAsString(), entry.getValue());
}
})));
committer.run();
objectMapper.writeValue(computeCommitFile(), Committed.create(commitHydrants, commitMetadata));
return commitMetadata;
} catch (Exception e) {
metrics.incrementFailedPersists();
throw Throwables.propagate(e);
} finally {
metrics.incrementNumPersists();
metrics.incrementPersistTimeMillis(persistStopwatch.elapsed(TimeUnit.MILLISECONDS));
persistStopwatch.stop();
}
}
});
final long startDelay = runExecStopwatch.elapsed(TimeUnit.MILLISECONDS);
metrics.incrementPersistBackPressureMillis(startDelay);
if (startDelay > WARN_DELAY) {
log.warn("Ingestion was throttled for [%,d] millis because persists were pending.", startDelay);
}
runExecStopwatch.stop();
resetNextFlush();
// NB: The rows are still in memory until they're done persisting, but we only count rows in active indexes.
rowsCurrentlyInMemory.set(0);
return future;
}
use of io.druid.segment.realtime.FireHydrant in project druid by druid-io.
the class RealtimePlumber method abandonSegment.
/**
* Unannounces a given sink and removes all local references to it. It is important that this is only called
* from the single-threaded mergeExecutor, since otherwise chaos may ensue if merged segments are deleted while
* being created.
*
* @param truncatedTime sink key
* @param sink sink to unannounce
*/
protected void abandonSegment(final long truncatedTime, final Sink sink) {
if (sinks.containsKey(truncatedTime)) {
try {
segmentAnnouncer.unannounceSegment(sink.getSegment());
removeSegment(sink, computePersistDir(schema, sink.getInterval()));
log.info("Removing sinkKey %d for segment %s", truncatedTime, sink.getSegment().getIdentifier());
sinks.remove(truncatedTime);
metrics.setSinkCount(sinks.size());
sinkTimeline.remove(sink.getInterval(), sink.getVersion(), new SingleElementPartitionChunk<>(sink));
for (FireHydrant hydrant : sink) {
cache.close(SinkQuerySegmentWalker.makeHydrantCacheIdentifier(hydrant));
}
synchronized (handoffCondition) {
handoffCondition.notifyAll();
}
} catch (Exception e) {
log.makeAlert(e, "Unable to abandon old segment for dataSource[%s]", schema.getDataSource()).addData("interval", sink.getInterval()).emit();
}
}
}
use of io.druid.segment.realtime.FireHydrant in project druid by druid-io.
the class RealtimePlumberSchoolTest method testDimOrderInheritanceHelper.
private void testDimOrderInheritanceHelper(final Object commitMetadata) throws Exception {
List<List<String>> expectedDims = ImmutableList.<List<String>>of(ImmutableList.of("dimD"), ImmutableList.of("dimC"), ImmutableList.of("dimA"), ImmutableList.of("dimB"), ImmutableList.of("dimE"), ImmutableList.of("dimD", "dimC", "dimA", "dimB", "dimE"));
QueryableIndex qindex;
FireHydrant hydrant;
Map<Long, Sink> sinks;
RealtimePlumber plumber = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema2, tuningConfig, metrics);
Assert.assertNull(plumber.startJob());
final CountDownLatch doneSignal = new CountDownLatch(1);
final Committer committer = new Committer() {
@Override
public Object getMetadata() {
return commitMetadata;
}
@Override
public void run() {
doneSignal.countDown();
}
};
plumber.add(getTestInputRowFull("1970-01-01", ImmutableList.of("dimD"), ImmutableList.of("1")), Suppliers.ofInstance(committer));
plumber.add(getTestInputRowFull("1970-01-01", ImmutableList.of("dimC"), ImmutableList.of("1")), Suppliers.ofInstance(committer));
plumber.add(getTestInputRowFull("1970-01-01", ImmutableList.of("dimA"), ImmutableList.of("1")), Suppliers.ofInstance(committer));
plumber.add(getTestInputRowFull("1970-01-01", ImmutableList.of("dimB"), ImmutableList.of("1")), Suppliers.ofInstance(committer));
plumber.add(getTestInputRowFull("1970-01-01", ImmutableList.of("dimE"), ImmutableList.of("1")), Suppliers.ofInstance(committer));
plumber.add(getTestInputRowFull("1970-01-01", ImmutableList.of("dimA", "dimB", "dimC", "dimD", "dimE"), ImmutableList.of("1")), Suppliers.ofInstance(committer));
plumber.persist(committer);
doneSignal.await();
plumber.getSinks().clear();
plumber.finishJob();
RealtimePlumber restoredPlumber = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema2, tuningConfig, metrics);
restoredPlumber.bootstrapSinksFromDisk();
sinks = restoredPlumber.getSinks();
Assert.assertEquals(1, sinks.size());
List<FireHydrant> hydrants = Lists.newArrayList(sinks.get(0L));
for (int i = 0; i < hydrants.size(); i++) {
hydrant = hydrants.get(i);
qindex = hydrant.getSegment().asQueryableIndex();
Assert.assertEquals(i, hydrant.getCount());
Assert.assertEquals(expectedDims.get(i), ImmutableList.copyOf(qindex.getAvailableDimensions()));
}
}
Aggregations