use of org.apache.druid.segment.realtime.FireHydrant in project druid by apache.
the class RealtimePlumber method bootstrapSinksFromDisk.
protected Object bootstrapSinksFromDisk() {
final VersioningPolicy versioningPolicy = config.getVersioningPolicy();
File baseDir = computeBaseDir(schema);
if (baseDir == null || !baseDir.exists()) {
return null;
}
File[] files = baseDir.listFiles();
if (files == null) {
return null;
}
Object metadata = null;
long latestCommitTime = 0;
for (File sinkDir : files) {
final Interval sinkInterval = Intervals.of(sinkDir.getName().replace('_', '/'));
// final File[] sinkFiles = sinkDir.listFiles();
// To avoid reading and listing of "merged" dir
final File[] sinkFiles = sinkDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String fileName) {
return !(Ints.tryParse(fileName) == null);
}
});
Arrays.sort(sinkFiles, new Comparator<File>() {
@Override
public int compare(File o1, File o2) {
try {
return Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName()));
} catch (NumberFormatException e) {
log.error(e, "Couldn't compare as numbers? [%s][%s]", o1, o2);
return o1.compareTo(o2);
}
}
});
boolean isCorrupted = false;
List<FireHydrant> hydrants = new ArrayList<>();
for (File segmentDir : sinkFiles) {
log.info("Loading previously persisted segment at [%s]", segmentDir);
// If 100% sure that this is not needed, this check can be removed.
if (Ints.tryParse(segmentDir.getName()) == null) {
continue;
}
QueryableIndex queryableIndex = null;
try {
queryableIndex = indexIO.loadIndex(segmentDir);
} catch (IOException e) {
log.error(e, "Problem loading segmentDir from disk.");
isCorrupted = true;
}
if (isCorrupted) {
try {
File corruptSegmentDir = computeCorruptedFileDumpDir(segmentDir, schema);
log.info("Renaming %s to %s", segmentDir.getAbsolutePath(), corruptSegmentDir.getAbsolutePath());
org.apache.commons.io.FileUtils.copyDirectory(segmentDir, corruptSegmentDir);
FileUtils.deleteDirectory(segmentDir);
} catch (Exception e1) {
log.error(e1, "Failed to rename %s", segmentDir.getAbsolutePath());
}
// at some point.
continue;
}
Metadata segmentMetadata = queryableIndex.getMetadata();
if (segmentMetadata != null) {
Object timestampObj = segmentMetadata.get(COMMIT_METADATA_TIMESTAMP_KEY);
if (timestampObj != null) {
long timestamp = ((Long) timestampObj).longValue();
if (timestamp > latestCommitTime) {
log.info("Found metaData [%s] with latestCommitTime [%s] greater than previous recorded [%s]", queryableIndex.getMetadata(), timestamp, latestCommitTime);
latestCommitTime = timestamp;
metadata = queryableIndex.getMetadata().get(COMMIT_METADATA_KEY);
}
}
}
hydrants.add(new FireHydrant(new QueryableIndexSegment(queryableIndex, SegmentId.of(schema.getDataSource(), sinkInterval, versioningPolicy.getVersion(sinkInterval), config.getShardSpec())), Integer.parseInt(segmentDir.getName())));
}
if (hydrants.isEmpty()) {
// Probably encountered a corrupt sink directory
log.warn("Found persisted segment directory with no intermediate segments present at %s, skipping sink creation.", sinkDir.getAbsolutePath());
continue;
}
final Sink currSink = new Sink(sinkInterval, schema, config.getShardSpec(), versioningPolicy.getVersion(sinkInterval), config.getAppendableIndexSpec(), config.getMaxRowsInMemory(), config.getMaxBytesInMemoryOrDefault(), true, config.getDedupColumn(), hydrants);
addSink(currSink);
}
return metadata;
}
use of org.apache.druid.segment.realtime.FireHydrant in project druid by apache.
the class RealtimePlumber method abandonSegment.
/**
* Unannounces a given sink and removes all local references to it. It is important that this is only called
* from the single-threaded mergeExecutor, since otherwise chaos may ensue if merged segments are deleted while
* being created.
*
* @param truncatedTime sink key
* @param sink sink to unannounce
*/
protected void abandonSegment(final long truncatedTime, final Sink sink) {
if (sinks.containsKey(truncatedTime)) {
try {
segmentAnnouncer.unannounceSegment(sink.getSegment());
removeSegment(sink, computePersistDir(schema, sink.getInterval()));
log.info("Removing sinkKey %d for segment %s", truncatedTime, sink.getSegment().getId());
sinks.remove(truncatedTime);
metrics.setSinkCount(sinks.size());
sinkTimeline.remove(sink.getInterval(), sink.getVersion(), new SingleElementPartitionChunk<>(sink));
for (FireHydrant hydrant : sink) {
cache.close(SinkQuerySegmentWalker.makeHydrantCacheIdentifier(hydrant));
hydrant.swapSegment(null);
}
synchronized (handoffCondition) {
handoffCondition.notifyAll();
}
} catch (Exception e) {
log.makeAlert(e, "Unable to abandon old segment for dataSource[%s]", schema.getDataSource()).addData("interval", sink.getInterval()).emit();
}
}
}
use of org.apache.druid.segment.realtime.FireHydrant in project druid by apache.
the class RealtimePlumberSchoolTest method testPersistHydrantGapsHelper.
private void testPersistHydrantGapsHelper(final Object commitMetadata) throws Exception {
Interval testInterval = new Interval(DateTimes.of("1970-01-01"), DateTimes.of("1971-01-01"));
RealtimePlumber plumber2 = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema2, tuningConfig, metrics);
Sink sink = new Sink(testInterval, schema2, tuningConfig.getShardSpec(), DateTimes.of("2014-12-01T12:34:56.789").toString(), tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), tuningConfig.getMaxBytesInMemoryOrDefault(), true, tuningConfig.getDedupColumn());
plumber2.getSinks().put(0L, sink);
Assert.assertNull(plumber2.startJob());
final CountDownLatch doneSignal = new CountDownLatch(1);
final Committer committer = new Committer() {
@Override
public Object getMetadata() {
return commitMetadata;
}
@Override
public void run() {
doneSignal.countDown();
}
};
plumber2.add(getTestInputRow("1970-01-01"), Suppliers.ofInstance(committer));
plumber2.add(getTestInputRow("1970-02-01"), Suppliers.ofInstance(committer));
plumber2.add(getTestInputRow("1970-03-01"), Suppliers.ofInstance(committer));
plumber2.add(getTestInputRow("1970-04-01"), Suppliers.ofInstance(committer));
plumber2.add(getTestInputRow("1970-05-01"), Suppliers.ofInstance(committer));
plumber2.persist(committer);
doneSignal.await();
plumber2.getSinks().clear();
plumber2.finishJob();
File persistDir = plumber2.computePersistDir(schema2, testInterval);
/* Check that all hydrants were persisted */
for (int i = 0; i < 5; i++) {
Assert.assertTrue(new File(persistDir, String.valueOf(i)).exists());
}
/* Create some gaps in the persisted hydrants and reload */
FileUtils.deleteDirectory(new File(persistDir, "1"));
FileUtils.deleteDirectory(new File(persistDir, "3"));
RealtimePlumber restoredPlumber = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema2, tuningConfig, metrics);
restoredPlumber.bootstrapSinksFromDisk();
Map<Long, Sink> sinks = restoredPlumber.getSinks();
Assert.assertEquals(1, sinks.size());
List<FireHydrant> hydrants = Lists.newArrayList(sinks.get(new Long(0)));
DateTime startTime = DateTimes.of("1970-01-01T00:00:00.000Z");
Interval expectedInterval = new Interval(startTime, DateTimes.of("1971-01-01T00:00:00.000Z"));
Assert.assertEquals(0, hydrants.get(0).getCount());
Assert.assertEquals(expectedInterval, hydrants.get(0).getSegmentDataInterval());
Assert.assertEquals(2, hydrants.get(1).getCount());
Assert.assertEquals(expectedInterval, hydrants.get(1).getSegmentDataInterval());
Assert.assertEquals(4, hydrants.get(2).getCount());
Assert.assertEquals(expectedInterval, hydrants.get(2).getSegmentDataInterval());
/* Delete all the hydrants and reload, no sink should be created */
FileUtils.deleteDirectory(new File(persistDir, "0"));
FileUtils.deleteDirectory(new File(persistDir, "2"));
FileUtils.deleteDirectory(new File(persistDir, "4"));
RealtimePlumber restoredPlumber2 = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema2, tuningConfig, metrics);
restoredPlumber2.bootstrapSinksFromDisk();
Assert.assertEquals(0, restoredPlumber2.getSinks().size());
}
use of org.apache.druid.segment.realtime.FireHydrant in project druid by apache.
the class SinkTest method testSwap.
@Test
public void testSwap() throws Exception {
final DataSchema schema = new DataSchema("test", new TimestampSpec(null, null, null), DimensionsSpec.EMPTY, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), null);
final Interval interval = Intervals.of("2013-01-01/2013-01-02");
final String version = DateTimes.nowUtc().toString();
RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, 100, null, null, new Period("P1Y"), null, null, null, null, null, null, null, null, 0, 0, null, null, null, null, null);
final Sink sink = new Sink(interval, schema, tuningConfig.getShardSpec(), version, tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), tuningConfig.getMaxBytesInMemoryOrDefault(), true, tuningConfig.getDedupColumn());
sink.add(new InputRow() {
@Override
public List<String> getDimensions() {
return new ArrayList<>();
}
@Override
public long getTimestampFromEpoch() {
return DateTimes.of("2013-01-01").getMillis();
}
@Override
public DateTime getTimestamp() {
return DateTimes.of("2013-01-01");
}
@Override
public List<String> getDimension(String dimension) {
return new ArrayList<>();
}
@Override
public Number getMetric(String metric) {
return 0;
}
@Override
public Object getRaw(String dimension) {
return null;
}
@Override
public int compareTo(Row o) {
return 0;
}
}, false);
FireHydrant currHydrant = sink.getCurrHydrant();
Assert.assertEquals(Intervals.of("2013-01-01/PT1M"), currHydrant.getIndex().getInterval());
FireHydrant swapHydrant = sink.swap();
sink.add(new InputRow() {
@Override
public List<String> getDimensions() {
return new ArrayList<>();
}
@Override
public long getTimestampFromEpoch() {
return DateTimes.of("2013-01-01").getMillis();
}
@Override
public DateTime getTimestamp() {
return DateTimes.of("2013-01-01");
}
@Override
public List<String> getDimension(String dimension) {
return new ArrayList<>();
}
@Override
public Number getMetric(String metric) {
return 0;
}
@Override
public Object getRaw(String dimension) {
return null;
}
@Override
public int compareTo(Row o) {
return 0;
}
}, false);
Assert.assertEquals(currHydrant, swapHydrant);
Assert.assertNotSame(currHydrant, sink.getCurrHydrant());
Assert.assertEquals(Intervals.of("2013-01-01/PT1M"), sink.getCurrHydrant().getIndex().getInterval());
Assert.assertEquals(2, Iterators.size(sink.iterator()));
}
use of org.apache.druid.segment.realtime.FireHydrant in project druid by apache.
the class AppenderatorImpl method persistAll.
@Override
public ListenableFuture<Object> persistAll(@Nullable final Committer committer) {
throwPersistErrorIfExists();
final Map<String, Integer> currentHydrants = new HashMap<>();
final List<Pair<FireHydrant, SegmentIdWithShardSpec>> indexesToPersist = new ArrayList<>();
int numPersistedRows = 0;
long bytesPersisted = 0L;
MutableLong totalHydrantsCount = new MutableLong();
MutableLong totalHydrantsPersisted = new MutableLong();
final long totalSinks = sinks.size();
for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
final SegmentIdWithShardSpec identifier = entry.getKey();
final Sink sink = entry.getValue();
if (sink == null) {
throw new ISE("No sink for identifier: %s", identifier);
}
final List<FireHydrant> hydrants = Lists.newArrayList(sink);
totalHydrantsCount.add(hydrants.size());
currentHydrants.put(identifier.toString(), hydrants.size());
numPersistedRows += sink.getNumRowsInMemory();
bytesPersisted += sink.getBytesInMemory();
final int limit = sink.isWritable() ? hydrants.size() - 1 : hydrants.size();
// gather hydrants that have not been persisted:
for (FireHydrant hydrant : hydrants.subList(0, limit)) {
if (!hydrant.hasSwapped()) {
log.debug("Hydrant[%s] hasn't persisted yet, persisting. Segment[%s]", hydrant, identifier);
indexesToPersist.add(Pair.of(hydrant, identifier));
totalHydrantsPersisted.add(1);
}
}
if (sink.swappable()) {
// It is swappable. Get the old one to persist it and create a new one:
indexesToPersist.add(Pair.of(sink.swap(), identifier));
totalHydrantsPersisted.add(1);
}
}
log.debug("Submitting persist runnable for dataSource[%s]", schema.getDataSource());
final Object commitMetadata = committer == null ? null : committer.getMetadata();
final Stopwatch runExecStopwatch = Stopwatch.createStarted();
final Stopwatch persistStopwatch = Stopwatch.createStarted();
AtomicLong totalPersistedRows = new AtomicLong(numPersistedRows);
final ListenableFuture<Object> future = persistExecutor.submit(new Callable<Object>() {
@Override
public Object call() throws IOException {
try {
for (Pair<FireHydrant, SegmentIdWithShardSpec> pair : indexesToPersist) {
metrics.incrementRowOutputCount(persistHydrant(pair.lhs, pair.rhs));
}
if (committer != null) {
log.debug("Committing metadata[%s] for sinks[%s].", commitMetadata, Joiner.on(", ").join(currentHydrants.entrySet().stream().map(entry -> StringUtils.format("%s:%d", entry.getKey(), entry.getValue())).collect(Collectors.toList())));
committer.run();
try {
commitLock.lock();
final Map<String, Integer> commitHydrants = new HashMap<>();
final Committed oldCommit = readCommit();
if (oldCommit != null) {
// merge current hydrants with existing hydrants
commitHydrants.putAll(oldCommit.getHydrants());
}
commitHydrants.putAll(currentHydrants);
writeCommit(new Committed(commitHydrants, commitMetadata));
} finally {
commitLock.unlock();
}
}
log.info("Flushed in-memory data with commit metadata [%s] for segments: %s", commitMetadata, indexesToPersist.stream().map(itp -> itp.rhs.asSegmentId().toString()).distinct().collect(Collectors.joining(", ")));
log.info("Persisted stats: processed rows: [%d], persisted rows[%d], sinks: [%d], total fireHydrants (across sinks): [%d], persisted fireHydrants (across sinks): [%d]", rowIngestionMeters.getProcessed(), totalPersistedRows.get(), totalSinks, totalHydrantsCount.longValue(), totalHydrantsPersisted.longValue());
// return null if committer is null
return commitMetadata;
} catch (IOException e) {
metrics.incrementFailedPersists();
throw e;
} finally {
metrics.incrementNumPersists();
metrics.incrementPersistTimeMillis(persistStopwatch.elapsed(TimeUnit.MILLISECONDS));
persistStopwatch.stop();
}
}
});
final long startDelay = runExecStopwatch.elapsed(TimeUnit.MILLISECONDS);
metrics.incrementPersistBackPressureMillis(startDelay);
if (startDelay > WARN_DELAY) {
log.warn("Ingestion was throttled for [%,d] millis because persists were pending.", startDelay);
}
runExecStopwatch.stop();
resetNextFlush();
// NB: The rows are still in memory until they're done persisting, but we only count rows in active indexes.
rowsCurrentlyInMemory.addAndGet(-numPersistedRows);
bytesCurrentlyInMemory.addAndGet(-bytesPersisted);
log.info("Persisted rows[%,d] and (estimated) bytes[%,d]", numPersistedRows, bytesPersisted);
return future;
}
Aggregations