use of org.apache.druid.segment.incremental.IncrementalIndexAddResult in project druid by druid-io.
the class BatchAppenderator method add.
@Override
public AppenderatorAddResult add(final SegmentIdWithShardSpec identifier, final InputRow row, @Nullable final Supplier<Committer> committerSupplier, final boolean allowIncrementalPersists) throws IndexSizeExceededException, SegmentNotWritableException {
throwPersistErrorIfExists();
Preconditions.checkArgument(committerSupplier == null, "Batch appenderator does not need a committer!");
Preconditions.checkArgument(allowIncrementalPersists, "Batch appenderator should always allow incremental persists!");
if (!identifier.getDataSource().equals(schema.getDataSource())) {
throw new IAE("Expected dataSource[%s] but was asked to insert row for dataSource[%s]?!", schema.getDataSource(), identifier.getDataSource());
}
final Sink sink = getOrCreateSink(identifier);
metrics.reportMessageMaxTimestamp(row.getTimestampFromEpoch());
final int sinkRowsInMemoryBeforeAdd = sink.getNumRowsInMemory();
final int sinkRowsInMemoryAfterAdd;
final long bytesInMemoryBeforeAdd = sink.getBytesInMemory();
final long bytesInMemoryAfterAdd;
final IncrementalIndexAddResult addResult;
try {
// allow incrememtal persis is always true for batch
addResult = sink.add(row, false);
sinkRowsInMemoryAfterAdd = addResult.getRowCount();
bytesInMemoryAfterAdd = addResult.getBytesInMemory();
} catch (IndexSizeExceededException e) {
// Uh oh, we can't do anything about this! We can't persist (commit metadata would be out of sync) and we
// can't add the row (it just failed). This should never actually happen, though, because we check
// sink.canAddRow after returning from add.
log.error(e, "Sink for segment[%s] was unexpectedly full!", identifier);
throw e;
}
if (sinkRowsInMemoryAfterAdd < 0) {
throw new SegmentNotWritableException("Attempt to add row to swapped-out sink for segment[%s].", identifier);
}
if (addResult.isRowAdded()) {
rowIngestionMeters.incrementProcessed();
} else if (addResult.hasParseException()) {
parseExceptionHandler.handle(addResult.getParseException());
}
final int numAddedRows = sinkRowsInMemoryAfterAdd - sinkRowsInMemoryBeforeAdd;
rowsCurrentlyInMemory += numAddedRows;
bytesCurrentlyInMemory += (bytesInMemoryAfterAdd - bytesInMemoryBeforeAdd);
totalRows += numAddedRows;
sinksMetadata.computeIfAbsent(identifier, unused -> new SinkMetadata()).addRows(numAddedRows);
boolean persist = false;
List<String> persistReasons = new ArrayList<>();
if (!sink.canAppendRow()) {
persist = true;
persistReasons.add("No more rows can be appended to sink");
}
if (rowsCurrentlyInMemory >= tuningConfig.getMaxRowsInMemory()) {
persist = true;
persistReasons.add(StringUtils.format("rowsCurrentlyInMemory[%d] is greater than maxRowsInMemory[%d]", rowsCurrentlyInMemory, tuningConfig.getMaxRowsInMemory()));
}
if (bytesCurrentlyInMemory >= maxBytesTuningConfig) {
persist = true;
persistReasons.add(StringUtils.format("bytesCurrentlyInMemory[%d] is greater than maxBytesInMemory[%d]", bytesCurrentlyInMemory, maxBytesTuningConfig));
}
if (persist) {
// persistAll clears rowsCurrentlyInMemory, no need to update it.
log.info("Incremental persist to disk because %s.", String.join(",", persistReasons));
long bytesToBePersisted = 0L;
for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
final Sink sinkEntry = entry.getValue();
if (sinkEntry != null) {
bytesToBePersisted += sinkEntry.getBytesInMemory();
if (sinkEntry.swappable()) {
// Code for batch no longer memory maps hydrants, but they still take memory...
int memoryStillInUse = calculateMemoryUsedByHydrant();
bytesCurrentlyInMemory += memoryStillInUse;
}
}
}
if (!skipBytesInMemoryOverheadCheck && bytesCurrentlyInMemory - bytesToBePersisted > maxBytesTuningConfig) {
// We are still over maxBytesTuningConfig even after persisting.
// This means that we ran out of all available memory to ingest (due to overheads created as part of ingestion)
final String alertMessage = StringUtils.format("Task has exceeded safe estimated heap usage limits, failing " + "(numSinks: [%d] numHydrantsAcrossAllSinks: [%d] totalRows: [%d])" + "(bytesCurrentlyInMemory: [%d] - bytesToBePersisted: [%d] > maxBytesTuningConfig: [%d])", sinks.size(), sinks.values().stream().mapToInt(Iterables::size).sum(), getTotalRowCount(), bytesCurrentlyInMemory, bytesToBePersisted, maxBytesTuningConfig);
final String errorMessage = StringUtils.format("%s.\nThis can occur when the overhead from too many intermediary segment persists becomes to " + "great to have enough space to process additional input rows. This check, along with metering the overhead " + "of these objects to factor into the 'maxBytesInMemory' computation, can be disabled by setting " + "'skipBytesInMemoryOverheadCheck' to 'true' (note that doing so might allow the task to naturally encounter " + "a 'java.lang.OutOfMemoryError'). Alternatively, 'maxBytesInMemory' can be increased which will cause an " + "increase in heap footprint, but will allow for more intermediary segment persists to occur before " + "reaching this condition.", alertMessage);
log.makeAlert(alertMessage).addData("dataSource", schema.getDataSource()).emit();
throw new RuntimeException(errorMessage);
}
Futures.addCallback(persistAll(null), new FutureCallback<Object>() {
@Override
public void onSuccess(@Nullable Object result) {
// do nothing
}
@Override
public void onFailure(Throwable t) {
persistError = t;
}
});
}
return new AppenderatorAddResult(identifier, sinksMetadata.get(identifier).numRowsInSegment, false);
}
use of org.apache.druid.segment.incremental.IncrementalIndexAddResult in project druid by druid-io.
the class StreamAppenderator method add.
@Override
public AppenderatorAddResult add(final SegmentIdWithShardSpec identifier, final InputRow row, @Nullable final Supplier<Committer> committerSupplier, final boolean allowIncrementalPersists) throws IndexSizeExceededException, SegmentNotWritableException {
throwPersistErrorIfExists();
if (!identifier.getDataSource().equals(schema.getDataSource())) {
throw new IAE("Expected dataSource[%s] but was asked to insert row for dataSource[%s]?!", schema.getDataSource(), identifier.getDataSource());
}
final Sink sink = getOrCreateSink(identifier);
metrics.reportMessageMaxTimestamp(row.getTimestampFromEpoch());
final int sinkRowsInMemoryBeforeAdd = sink.getNumRowsInMemory();
final int sinkRowsInMemoryAfterAdd;
final long bytesInMemoryBeforeAdd = sink.getBytesInMemory();
final long bytesInMemoryAfterAdd;
final IncrementalIndexAddResult addResult;
try {
addResult = sink.add(row, !allowIncrementalPersists);
sinkRowsInMemoryAfterAdd = addResult.getRowCount();
bytesInMemoryAfterAdd = addResult.getBytesInMemory();
} catch (IndexSizeExceededException e) {
// Uh oh, we can't do anything about this! We can't persist (commit metadata would be out of sync) and we
// can't add the row (it just failed). This should never actually happen, though, because we check
// sink.canAddRow after returning from add.
log.error(e, "Sink for segment[%s] was unexpectedly full!", identifier);
throw e;
}
if (sinkRowsInMemoryAfterAdd < 0) {
throw new SegmentNotWritableException("Attempt to add row to swapped-out sink for segment[%s].", identifier);
}
if (addResult.isRowAdded()) {
rowIngestionMeters.incrementProcessed();
} else if (addResult.hasParseException()) {
parseExceptionHandler.handle(addResult.getParseException());
}
final int numAddedRows = sinkRowsInMemoryAfterAdd - sinkRowsInMemoryBeforeAdd;
rowsCurrentlyInMemory.addAndGet(numAddedRows);
bytesCurrentlyInMemory.addAndGet(bytesInMemoryAfterAdd - bytesInMemoryBeforeAdd);
totalRows.addAndGet(numAddedRows);
boolean isPersistRequired = false;
boolean persist = false;
List<String> persistReasons = new ArrayList<>();
if (!sink.canAppendRow()) {
persist = true;
persistReasons.add("No more rows can be appended to sink");
}
if (System.currentTimeMillis() > nextFlush) {
persist = true;
persistReasons.add(StringUtils.format("current time[%d] is greater than nextFlush[%d]", System.currentTimeMillis(), nextFlush));
}
if (rowsCurrentlyInMemory.get() >= tuningConfig.getMaxRowsInMemory()) {
persist = true;
persistReasons.add(StringUtils.format("rowsCurrentlyInMemory[%d] is greater than maxRowsInMemory[%d]", rowsCurrentlyInMemory.get(), tuningConfig.getMaxRowsInMemory()));
}
if (bytesCurrentlyInMemory.get() >= maxBytesTuningConfig) {
persist = true;
persistReasons.add(StringUtils.format("(estimated) bytesCurrentlyInMemory[%d] is greater than maxBytesInMemory[%d]", bytesCurrentlyInMemory.get(), maxBytesTuningConfig));
}
if (persist) {
if (allowIncrementalPersists) {
// persistAll clears rowsCurrentlyInMemory, no need to update it.
log.info("Flushing in-memory data to disk because %s.", String.join(",", persistReasons));
long bytesToBePersisted = 0L;
for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
final Sink sinkEntry = entry.getValue();
if (sinkEntry != null) {
bytesToBePersisted += sinkEntry.getBytesInMemory();
if (sinkEntry.swappable()) {
// After swapping the sink, we use memory mapped segment instead (but only for real time appenderators!).
// However, the memory mapped segment still consumes memory.
// These memory mapped segments are held in memory throughout the ingestion phase and permanently add to the bytesCurrentlyInMemory
int memoryStillInUse = calculateMMappedHydrantMemoryInUsed(sink.getCurrHydrant());
bytesCurrentlyInMemory.addAndGet(memoryStillInUse);
}
}
}
if (!skipBytesInMemoryOverheadCheck && bytesCurrentlyInMemory.get() - bytesToBePersisted > maxBytesTuningConfig) {
// We are still over maxBytesTuningConfig even after persisting.
// This means that we ran out of all available memory to ingest (due to overheads created as part of ingestion)
final String alertMessage = StringUtils.format("Task has exceeded safe estimated heap usage limits, failing " + "(numSinks: [%d] numHydrantsAcrossAllSinks: [%d] totalRows: [%d])" + "(bytesCurrentlyInMemory: [%d] - bytesToBePersisted: [%d] > maxBytesTuningConfig: [%d])", sinks.size(), sinks.values().stream().mapToInt(Iterables::size).sum(), getTotalRowCount(), bytesCurrentlyInMemory.get(), bytesToBePersisted, maxBytesTuningConfig);
final String errorMessage = StringUtils.format("%s.\nThis can occur when the overhead from too many intermediary segment persists becomes to " + "great to have enough space to process additional input rows. This check, along with metering the overhead " + "of these objects to factor into the 'maxBytesInMemory' computation, can be disabled by setting " + "'skipBytesInMemoryOverheadCheck' to 'true' (note that doing so might allow the task to naturally encounter " + "a 'java.lang.OutOfMemoryError'). Alternatively, 'maxBytesInMemory' can be increased which will cause an " + "increase in heap footprint, but will allow for more intermediary segment persists to occur before " + "reaching this condition.", alertMessage);
log.makeAlert(alertMessage).addData("dataSource", schema.getDataSource()).emit();
throw new RuntimeException(errorMessage);
}
Futures.addCallback(persistAll(committerSupplier == null ? null : committerSupplier.get()), new FutureCallback<Object>() {
@Override
public void onSuccess(@Nullable Object result) {
// do nothing
}
@Override
public void onFailure(Throwable t) {
persistError = t;
}
});
} else {
isPersistRequired = true;
}
}
return new AppenderatorAddResult(identifier, sink.getNumRows(), isPersistRequired);
}
use of org.apache.druid.segment.incremental.IncrementalIndexAddResult in project druid by druid-io.
the class RealtimePlumber method add.
@Override
public IncrementalIndexAddResult add(InputRow row, Supplier<Committer> committerSupplier) throws IndexSizeExceededException {
long messageTimestamp = row.getTimestampFromEpoch();
final Sink sink = getSink(messageTimestamp);
metrics.reportMessageMaxTimestamp(messageTimestamp);
if (sink == null) {
return Plumber.THROWAWAY;
}
final IncrementalIndexAddResult addResult = sink.add(row, false);
if (config.isReportParseExceptions() && addResult.getParseException() != null) {
throw addResult.getParseException();
}
if (!sink.canAppendRow() || System.currentTimeMillis() > nextFlush) {
persist(committerSupplier.get());
}
return addResult;
}
use of org.apache.druid.segment.incremental.IncrementalIndexAddResult in project druid by druid-io.
the class AppenderatorImpl method add.
@Override
public AppenderatorAddResult add(final SegmentIdWithShardSpec identifier, final InputRow row, @Nullable final Supplier<Committer> committerSupplier, final boolean allowIncrementalPersists) throws IndexSizeExceededException, SegmentNotWritableException {
throwPersistErrorIfExists();
if (!identifier.getDataSource().equals(schema.getDataSource())) {
throw new IAE("Expected dataSource[%s] but was asked to insert row for dataSource[%s]?!", schema.getDataSource(), identifier.getDataSource());
}
final Sink sink = getOrCreateSink(identifier);
metrics.reportMessageMaxTimestamp(row.getTimestampFromEpoch());
final int sinkRowsInMemoryBeforeAdd = sink.getNumRowsInMemory();
final int sinkRowsInMemoryAfterAdd;
final long bytesInMemoryBeforeAdd = sink.getBytesInMemory();
final long bytesInMemoryAfterAdd;
final IncrementalIndexAddResult addResult;
try {
addResult = sink.add(row, !allowIncrementalPersists);
sinkRowsInMemoryAfterAdd = addResult.getRowCount();
bytesInMemoryAfterAdd = addResult.getBytesInMemory();
} catch (IndexSizeExceededException e) {
// Uh oh, we can't do anything about this! We can't persist (commit metadata would be out of sync) and we
// can't add the row (it just failed). This should never actually happen, though, because we check
// sink.canAddRow after returning from add.
log.error(e, "Sink for segment[%s] was unexpectedly full!", identifier);
throw e;
}
if (sinkRowsInMemoryAfterAdd < 0) {
throw new SegmentNotWritableException("Attempt to add row to swapped-out sink for segment[%s].", identifier);
}
if (addResult.isRowAdded()) {
rowIngestionMeters.incrementProcessed();
} else if (addResult.hasParseException()) {
parseExceptionHandler.handle(addResult.getParseException());
}
final int numAddedRows = sinkRowsInMemoryAfterAdd - sinkRowsInMemoryBeforeAdd;
rowsCurrentlyInMemory.addAndGet(numAddedRows);
bytesCurrentlyInMemory.addAndGet(bytesInMemoryAfterAdd - bytesInMemoryBeforeAdd);
totalRows.addAndGet(numAddedRows);
boolean isPersistRequired = false;
boolean persist = false;
List<String> persistReasons = new ArrayList<>();
if (!sink.canAppendRow()) {
persist = true;
persistReasons.add("No more rows can be appended to sink");
}
if (System.currentTimeMillis() > nextFlush) {
persist = true;
persistReasons.add(StringUtils.format("current time[%d] is greater than nextFlush[%d]", System.currentTimeMillis(), nextFlush));
}
if (rowsCurrentlyInMemory.get() >= tuningConfig.getMaxRowsInMemory()) {
persist = true;
persistReasons.add(StringUtils.format("rowsCurrentlyInMemory[%d] is greater than maxRowsInMemory[%d]", rowsCurrentlyInMemory.get(), tuningConfig.getMaxRowsInMemory()));
}
if (bytesCurrentlyInMemory.get() >= maxBytesTuningConfig) {
persist = true;
persistReasons.add(StringUtils.format("(estimated) bytesCurrentlyInMemory[%d] is greater than maxBytesInMemory[%d]", bytesCurrentlyInMemory.get(), maxBytesTuningConfig));
}
if (persist) {
if (allowIncrementalPersists) {
// persistAll clears rowsCurrentlyInMemory, no need to update it.
log.info("Flushing in-memory data to disk because %s.", String.join(",", persistReasons));
long bytesToBePersisted = 0L;
for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : sinks.entrySet()) {
final Sink sinkEntry = entry.getValue();
if (sinkEntry != null) {
bytesToBePersisted += sinkEntry.getBytesInMemory();
if (sinkEntry.swappable()) {
// After swapping the sink, we use memory mapped segment instead (but only for real time appenderators!).
// However, the memory mapped segment still consumes memory.
// These memory mapped segments are held in memory throughout the ingestion phase and permanently add to the bytesCurrentlyInMemory
int memoryStillInUse = calculateMMappedHydrantMemoryInUsed(sink.getCurrHydrant());
bytesCurrentlyInMemory.addAndGet(memoryStillInUse);
}
}
}
if (!skipBytesInMemoryOverheadCheck && bytesCurrentlyInMemory.get() - bytesToBePersisted > maxBytesTuningConfig) {
// We are still over maxBytesTuningConfig even after persisting.
// This means that we ran out of all available memory to ingest (due to overheads created as part of ingestion)
final String alertMessage = StringUtils.format("Task has exceeded safe estimated heap usage limits, failing " + "(numSinks: [%d] numHydrantsAcrossAllSinks: [%d] totalRows: [%d])" + "(bytesCurrentlyInMemory: [%d] - bytesToBePersisted: [%d] > maxBytesTuningConfig: [%d])", sinks.size(), sinks.values().stream().mapToInt(Iterables::size).sum(), getTotalRowCount(), bytesCurrentlyInMemory.get(), bytesToBePersisted, maxBytesTuningConfig);
final String errorMessage = StringUtils.format("%s.\nThis can occur when the overhead from too many intermediary segment persists becomes to " + "great to have enough space to process additional input rows. This check, along with metering the overhead " + "of these objects to factor into the 'maxBytesInMemory' computation, can be disabled by setting " + "'skipBytesInMemoryOverheadCheck' to 'true' (note that doing so might allow the task to naturally encounter " + "a 'java.lang.OutOfMemoryError'). Alternatively, 'maxBytesInMemory' can be increased which will cause an " + "increase in heap footprint, but will allow for more intermediary segment persists to occur before " + "reaching this condition.", alertMessage);
log.makeAlert(alertMessage).addData("dataSource", schema.getDataSource()).emit();
throw new RuntimeException(errorMessage);
}
Futures.addCallback(persistAll(committerSupplier == null ? null : committerSupplier.get()), new FutureCallback<Object>() {
@Override
public void onSuccess(@Nullable Object result) {
// do nothing
}
@Override
public void onFailure(Throwable t) {
persistError = t;
}
});
} else {
isPersistRequired = true;
}
}
return new AppenderatorAddResult(identifier, sink.getNumRows(), isPersistRequired);
}
use of org.apache.druid.segment.incremental.IncrementalIndexAddResult in project druid by druid-io.
the class Plumbers method addNextRow.
public static void addNextRow(final Supplier<Committer> committerSupplier, final Firehose firehose, final Plumber plumber, final boolean reportParseExceptions, final FireDepartmentMetrics metrics) throws IOException {
final InputRow inputRow;
try {
inputRow = firehose.nextRow();
} catch (ParseException e) {
if (reportParseExceptions) {
throw e;
} else {
log.debug(e, "Discarded row due to exception, considering unparseable.");
metrics.incrementUnparseable();
return;
}
}
if (inputRow == null) {
log.debug("Discarded null row, considering thrownAway.");
metrics.incrementThrownAway();
return;
}
final IncrementalIndexAddResult addResult;
try {
addResult = plumber.add(inputRow, committerSupplier);
} catch (IndexSizeExceededException e) {
// plumber.add should be swapping out indexes before they fill up.
throw new ISE(e, "Index size exceeded");
}
if (addResult.getRowCount() == -1) {
metrics.incrementThrownAway();
log.debug("Discarded row[%s], considering thrownAway due to %s.", inputRow, addResult.getReasonOfNotAdded());
return;
}
if (addResult.getRowCount() == -2) {
metrics.incrementDedup();
log.debug("Discarded row[%s], considering duplication.", inputRow);
return;
}
metrics.incrementProcessed();
}
Aggregations