use of io.druid.segment.realtime.plumber.Sink in project druid by druid-io.
the class AppenderatorImpl method add.
@Override
public int add(final SegmentIdentifier identifier, final InputRow row, final Supplier<Committer> committerSupplier) throws IndexSizeExceededException, SegmentNotWritableException {
if (!identifier.getDataSource().equals(schema.getDataSource())) {
throw new IAE("Expected dataSource[%s] but was asked to insert row for dataSource[%s]?!", schema.getDataSource(), identifier.getDataSource());
}
final Sink sink = getOrCreateSink(identifier);
metrics.reportMessageMaxTimestamp(row.getTimestampFromEpoch());
final int sinkRowsInMemoryBeforeAdd = sink.getNumRowsInMemory();
final int sinkRowsInMemoryAfterAdd;
try {
sinkRowsInMemoryAfterAdd = sink.add(row);
} catch (IndexSizeExceededException e) {
// Uh oh, we can't do anything about this! We can't persist (commit metadata would be out of sync) and we
// can't add the row (it just failed). This should never actually happen, though, because we check
// sink.canAddRow after returning from add.
log.error(e, "Sink for segment[%s] was unexpectedly full!", identifier);
throw e;
}
if (sinkRowsInMemoryAfterAdd < 0) {
throw new SegmentNotWritableException("Attempt to add row to swapped-out sink for segment[%s].", identifier);
}
rowsCurrentlyInMemory.addAndGet(sinkRowsInMemoryAfterAdd - sinkRowsInMemoryBeforeAdd);
if (!sink.canAppendRow() || System.currentTimeMillis() > nextFlush || rowsCurrentlyInMemory.get() >= tuningConfig.getMaxRowsInMemory()) {
// persistAll clears rowsCurrentlyInMemory, no need to update it.
persistAll(committerSupplier.get());
}
return sink.getNumRows();
}
use of io.druid.segment.realtime.plumber.Sink in project druid by druid-io.
the class AppenderatorImpl method getOrCreateSink.
private Sink getOrCreateSink(final SegmentIdentifier identifier) {
Sink retVal = sinks.get(identifier);
if (retVal == null) {
retVal = new Sink(identifier.getInterval(), schema, identifier.getShardSpec(), identifier.getVersion(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions());
try {
segmentAnnouncer.announceSegment(retVal.getSegment());
} catch (IOException e) {
log.makeAlert(e, "Failed to announce new segment[%s]", schema.getDataSource()).addData("interval", retVal.getInterval()).emit();
}
sinks.put(identifier, retVal);
metrics.setSinkCount(sinks.size());
sinkTimeline.add(retVal.getInterval(), retVal.getVersion(), identifier.getShardSpec().createChunk(retVal));
}
return retVal;
}
use of io.druid.segment.realtime.plumber.Sink in project druid by druid-io.
the class AppenderatorImpl method bootstrapSinksFromDisk.
/**
* Populate "sinks" and "sinkTimeline" with committed segments, and announce them with the segmentAnnouncer.
*
* @return persisted commit metadata
*/
private Object bootstrapSinksFromDisk() {
Preconditions.checkState(sinks.isEmpty(), "Already bootstrapped?!");
final File baseDir = tuningConfig.getBasePersistDirectory();
if (!baseDir.exists()) {
return null;
}
final File[] files = baseDir.listFiles();
if (files == null) {
return null;
}
final File commitFile = computeCommitFile();
final Committed committed;
try {
if (commitFile.exists()) {
committed = objectMapper.readValue(commitFile, Committed.class);
} else {
committed = Committed.nil();
}
} catch (Exception e) {
throw new ISE(e, "Failed to read commitFile: %s", commitFile);
}
log.info("Loading sinks from[%s]: %s", baseDir, committed.getHydrants().keySet());
for (File sinkDir : files) {
final File identifierFile = new File(sinkDir, IDENTIFIER_FILE_NAME);
if (!identifierFile.isFile()) {
// No identifier in this sinkDir; it must not actually be a sink directory. Skip it.
continue;
}
try {
final SegmentIdentifier identifier = objectMapper.readValue(new File(sinkDir, "identifier.json"), SegmentIdentifier.class);
final int committedHydrants = committed.getCommittedHydrants(identifier.getIdentifierAsString());
if (committedHydrants <= 0) {
log.info("Removing uncommitted sink at [%s]", sinkDir);
FileUtils.deleteDirectory(sinkDir);
continue;
}
// To avoid reading and listing of "merged" dir and other special files
final File[] sinkFiles = sinkDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String fileName) {
return !(Ints.tryParse(fileName) == null);
}
});
Arrays.sort(sinkFiles, new Comparator<File>() {
@Override
public int compare(File o1, File o2) {
return Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName()));
}
});
List<FireHydrant> hydrants = Lists.newArrayList();
for (File hydrantDir : sinkFiles) {
final int hydrantNumber = Integer.parseInt(hydrantDir.getName());
if (hydrantNumber >= committedHydrants) {
log.info("Removing uncommitted segment at [%s]", hydrantDir);
FileUtils.deleteDirectory(hydrantDir);
} else {
log.info("Loading previously persisted segment at [%s]", hydrantDir);
if (hydrantNumber != hydrants.size()) {
throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
}
hydrants.add(new FireHydrant(new QueryableIndexSegment(identifier.getIdentifierAsString(), indexIO.loadIndex(hydrantDir)), hydrantNumber));
}
}
// Make sure we loaded enough hydrants.
if (committedHydrants != hydrants.size()) {
throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
}
Sink currSink = new Sink(identifier.getInterval(), schema, identifier.getShardSpec(), identifier.getVersion(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions(), hydrants);
sinks.put(identifier, currSink);
sinkTimeline.add(currSink.getInterval(), currSink.getVersion(), identifier.getShardSpec().createChunk(currSink));
segmentAnnouncer.announceSegment(currSink.getSegment());
} catch (IOException e) {
log.makeAlert(e, "Problem loading sink[%s] from disk.", schema.getDataSource()).addData("sinkDir", sinkDir).emit();
}
}
// Make sure we loaded all committed sinks.
final Set<String> loadedSinks = Sets.newHashSet(Iterables.transform(sinks.keySet(), new Function<SegmentIdentifier, String>() {
@Override
public String apply(SegmentIdentifier input) {
return input.getIdentifierAsString();
}
}));
final Set<String> missingSinks = Sets.difference(committed.getHydrants().keySet(), loadedSinks);
if (!missingSinks.isEmpty()) {
throw new ISE("Missing committed sinks [%s]", Joiner.on(", ").join(missingSinks));
}
return committed.getMetadata();
}
use of io.druid.segment.realtime.plumber.Sink in project druid by druid-io.
the class AppenderatorImpl method persistAll.
@Override
public ListenableFuture<Object> persistAll(final Committer committer) {
// Submit persistAll task to the persistExecutor
final Map<SegmentIdentifier, Integer> commitHydrants = Maps.newHashMap();
final List<Pair<FireHydrant, SegmentIdentifier>> indexesToPersist = Lists.newArrayList();
final Set<SegmentIdentifier> identifiers = sinks.keySet();
for (SegmentIdentifier identifier : identifiers) {
final Sink sink = sinks.get(identifier);
final List<FireHydrant> hydrants = Lists.newArrayList(sink);
commitHydrants.put(identifier, hydrants.size());
final int limit = sink.isWritable() ? hydrants.size() - 1 : hydrants.size();
for (FireHydrant hydrant : hydrants.subList(0, limit)) {
if (!hydrant.hasSwapped()) {
log.info("Hydrant[%s] hasn't persisted yet, persisting. Segment[%s]", hydrant, identifier);
indexesToPersist.add(Pair.of(hydrant, identifier));
}
}
if (sink.swappable()) {
indexesToPersist.add(Pair.of(sink.swap(), identifier));
}
}
log.info("Submitting persist runnable for dataSource[%s]", schema.getDataSource());
final String threadName = String.format("%s-incremental-persist", schema.getDataSource());
final Object commitMetadata = committer.getMetadata();
final Stopwatch runExecStopwatch = Stopwatch.createStarted();
final Stopwatch persistStopwatch = Stopwatch.createStarted();
final ListenableFuture<Object> future = persistExecutor.submit(new ThreadRenamingCallable<Object>(threadName) {
@Override
public Object doCall() {
try {
for (Pair<FireHydrant, SegmentIdentifier> pair : indexesToPersist) {
metrics.incrementRowOutputCount(persistHydrant(pair.lhs, pair.rhs));
}
log.info("Committing metadata[%s] for sinks[%s].", commitMetadata, Joiner.on(", ").join(Iterables.transform(commitHydrants.entrySet(), new Function<Map.Entry<SegmentIdentifier, Integer>, String>() {
@Override
public String apply(Map.Entry<SegmentIdentifier, Integer> entry) {
return String.format("%s:%d", entry.getKey().getIdentifierAsString(), entry.getValue());
}
})));
committer.run();
objectMapper.writeValue(computeCommitFile(), Committed.create(commitHydrants, commitMetadata));
return commitMetadata;
} catch (Exception e) {
metrics.incrementFailedPersists();
throw Throwables.propagate(e);
} finally {
metrics.incrementNumPersists();
metrics.incrementPersistTimeMillis(persistStopwatch.elapsed(TimeUnit.MILLISECONDS));
persistStopwatch.stop();
}
}
});
final long startDelay = runExecStopwatch.elapsed(TimeUnit.MILLISECONDS);
metrics.incrementPersistBackPressureMillis(startDelay);
if (startDelay > WARN_DELAY) {
log.warn("Ingestion was throttled for [%,d] millis because persists were pending.", startDelay);
}
runExecStopwatch.stop();
resetNextFlush();
// NB: The rows are still in memory until they're done persisting, but we only count rows in active indexes.
rowsCurrentlyInMemory.set(0);
return future;
}
use of io.druid.segment.realtime.plumber.Sink in project druid by druid-io.
the class RealtimeManagerTest method setUp.
@Before
public void setUp() throws Exception {
final List<TestInputRowHolder> rows = Arrays.asList(makeRow(new DateTime("9000-01-01").getMillis()), makeRow(new ParseException("parse error")), null, makeRow(new DateTime().getMillis()));
ObjectMapper jsonMapper = new DefaultObjectMapper();
schema = new DataSchema("test", null, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
schema2 = new DataSchema("testV2", null, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
RealtimeIOConfig ioConfig = new RealtimeIOConfig(new FirehoseFactory() {
@Override
public Firehose connect(InputRowParser parser) throws IOException {
return new TestFirehose(rows.iterator());
}
}, new PlumberSchool() {
@Override
public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
return plumber;
}
}, null);
RealtimeIOConfig ioConfig2 = new RealtimeIOConfig(null, new PlumberSchool() {
@Override
public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
return plumber2;
}
}, new FirehoseFactoryV2() {
@Override
public FirehoseV2 connect(InputRowParser parser, Object arg1) throws IOException, ParseException {
return new TestFirehoseV2(rows.iterator());
}
});
RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, null, null, null, 0, 0, null, null);
plumber = new TestPlumber(new Sink(new Interval("0/P5000Y"), schema, tuningConfig.getShardSpec(), new DateTime().toString(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions()));
realtimeManager = new RealtimeManager(Arrays.<FireDepartment>asList(new FireDepartment(schema, ioConfig, tuningConfig)), null);
plumber2 = new TestPlumber(new Sink(new Interval("0/P5000Y"), schema2, tuningConfig.getShardSpec(), new DateTime().toString(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions()));
realtimeManager2 = new RealtimeManager(Arrays.<FireDepartment>asList(new FireDepartment(schema2, ioConfig2, tuningConfig)), null);
tuningConfig_0 = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, new LinearShardSpec(0), null, null, 0, 0, null, null);
tuningConfig_1 = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, new LinearShardSpec(1), null, null, 0, 0, null, null);
schema3 = new DataSchema("testing", null, new AggregatorFactory[] { new CountAggregatorFactory("ignore") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
FireDepartment department_0 = new FireDepartment(schema3, ioConfig, tuningConfig_0);
FireDepartment department_1 = new FireDepartment(schema3, ioConfig2, tuningConfig_1);
QueryRunnerFactoryConglomerate conglomerate = new QueryRunnerFactoryConglomerate() {
@Override
public <T, QueryType extends Query<T>> QueryRunnerFactory<T, QueryType> findFactory(QueryType query) {
return factory;
}
};
chiefStartedLatch = new CountDownLatch(2);
RealtimeManager.FireChief fireChief_0 = new RealtimeManager.FireChief(department_0, conglomerate) {
@Override
public void run() {
super.initPlumber();
chiefStartedLatch.countDown();
}
};
RealtimeManager.FireChief fireChief_1 = new RealtimeManager.FireChief(department_1, conglomerate) {
@Override
public void run() {
super.initPlumber();
chiefStartedLatch.countDown();
}
};
realtimeManager3 = new RealtimeManager(Arrays.asList(department_0, department_1), conglomerate, ImmutableMap.<String, Map<Integer, RealtimeManager.FireChief>>of("testing", ImmutableMap.of(0, fireChief_0, 1, fireChief_1)));
startFireChiefWithPartitionNum(fireChief_0, 0);
startFireChiefWithPartitionNum(fireChief_1, 1);
}
Aggregations