use of org.apache.nifi.util.timebuffer.TimedCountSize in project nifi by apache.
the class PersistentProvenanceRepository method rollover.
/**
* <p>
* MUST be called with the write lock held.
* </p>
*
* Rolls over the data in the journal files, merging them into a single
* Provenance Event Log File, and compressing and indexing as needed.
*
* @param force if true, will force a rollover regardless of whether or not
* data has been written
* @throws IOException if unable to complete rollover
*/
private void rollover(final boolean force) throws IOException {
if (!configuration.isAllowRollover()) {
return;
}
// have written something to the stream, then roll over
if (force || recordsWrittenSinceRollover.get() > 0L || dirtyWriterCount.get() > 0) {
final List<File> journalsToMerge = new ArrayList<>();
for (final RecordWriter writer : writers) {
if (!writer.isClosed()) {
final File writerFile = writer.getFile();
journalsToMerge.add(writerFile);
try {
writer.close();
} catch (final IOException ioe) {
logger.warn("Failed to close {} due to {}", writer, ioe.toString());
if (logger.isDebugEnabled()) {
logger.warn("", ioe);
}
}
}
}
if (logger.isDebugEnabled()) {
if (journalsToMerge.isEmpty()) {
logger.debug("No journals to merge; all RecordWriters were already closed");
} else {
logger.debug("Going to merge {} files for journals starting with ID {}", journalsToMerge.size(), LuceneUtil.substringBefore(journalsToMerge.get(0).getName(), "."));
}
}
// Choose a storage directory to store the merged file in.
final long storageDirIdx = storageDirectoryIndex.getAndIncrement();
final List<File> storageDirs = new ArrayList<>(configuration.getStorageDirectories().values());
final File storageDir = storageDirs.get((int) (storageDirIdx % storageDirs.size()));
Future<?> future = null;
if (!journalsToMerge.isEmpty()) {
// Run the rollover logic in a background thread.
final AtomicReference<Future<?>> futureReference = new AtomicReference<>();
final AtomicInteger retryAttempts = new AtomicInteger(MAX_JOURNAL_ROLLOVER_RETRIES);
final int recordsWritten = recordsWrittenSinceRollover.getAndSet(0);
final Runnable rolloverRunnable = new Runnable() {
@Override
public void run() {
File fileRolledOver = null;
try {
try {
fileRolledOver = mergeJournals(journalsToMerge, getMergeFile(journalsToMerge, storageDir), eventReporter);
} catch (final IOException ioe) {
logger.error("Failed to merge Journal Files {} into a Provenance Log File due to {}", journalsToMerge, ioe.toString());
logger.error("", ioe);
}
if (fileRolledOver != null) {
final File file = fileRolledOver;
// update our map of id to Path
// We need to make sure that another thread doesn't also update the map at the same time. We cannot
// use the write lock when purging old events, and we want to use the same approach here.
boolean updated = false;
final Long fileFirstEventId = Long.valueOf(LuceneUtil.substringBefore(fileRolledOver.getName(), "."));
while (!updated) {
final SortedMap<Long, Path> existingPathMap = idToPathMap.get();
final SortedMap<Long, Path> newIdToPathMap = new TreeMap<>(new PathMapComparator());
newIdToPathMap.putAll(existingPathMap);
newIdToPathMap.put(fileFirstEventId, file.toPath());
updated = idToPathMap.compareAndSet(existingPathMap, newIdToPathMap);
}
final TimedCountSize countSize = updateCounts.getAggregateValue(System.currentTimeMillis() - TimeUnit.MILLISECONDS.convert(5, TimeUnit.MINUTES));
logger.info("Successfully Rolled over Provenance Event file containing {} records. In the past 5 minutes, " + "{} events have been written to the Provenance Repository, totaling {}", recordsWritten, countSize.getCount(), FormatUtils.formatDataSize(countSize.getSize()));
}
// if files were rolled over or if out of retries stop the future
if (fileRolledOver != null || retryAttempts.decrementAndGet() == 0) {
if (fileRolledOver == null && retryAttempts.get() == 0) {
logger.error("Failed to merge Journal Files {} after {} attempts.", journalsToMerge, MAX_JOURNAL_ROLLOVER_RETRIES);
}
rolloverCompletions.getAndIncrement();
// Cancel the future so that we don't run anymore
Future<?> future;
while ((future = futureReference.get()) == null) {
try {
Thread.sleep(10L);
} catch (final InterruptedException ie) {
}
}
future.cancel(false);
} else {
logger.warn("Couldn't merge journals. Will try again. journalsToMerge: {}, storageDir: {}", journalsToMerge, storageDir);
}
} catch (final Exception e) {
logger.error("Failed to merge journals. Will try again. journalsToMerge: {}, storageDir: {}, cause: {}", journalsToMerge, storageDir, e.toString());
logger.error("", e);
}
}
};
// We are going to schedule the future to run immediately and then repeat every 10 seconds. This allows us to keep retrying if we
// fail for some reason. When we succeed or if retries are exceeded, the Runnable will cancel itself.
future = rolloverExecutor.scheduleWithFixedDelay(rolloverRunnable, 0, getRolloverRetryMillis(), TimeUnit.MILLISECONDS);
futureReference.set(future);
}
streamStartTime.set(System.currentTimeMillis());
bytesWrittenSinceRollover.set(0);
// We don't want to create new 'writers' until the number of unmerged journals falls below our threshold. So we wait
// here before we repopulate the 'writers' member variable and release the lock.
int journalFileCount = getJournalCount();
long repoSize = getSize(getLogFiles(), 0L);
final int journalCountThreshold = configuration.getJournalCount() * 5;
final long sizeThreshold = (long) (configuration.getMaxStorageCapacity() * ROLLOVER_HIGH_WATER);
// that is no longer the case.
if (journalFileCount > journalCountThreshold || repoSize > sizeThreshold) {
final long stopTheWorldStart = System.nanoTime();
logger.warn("The rate of the dataflow is exceeding the provenance recording rate. " + "Slowing down flow to accommodate. Currently, there are {} journal files ({} bytes) and " + "threshold for blocking is {} ({} bytes)", journalFileCount, repoSize, journalCountThreshold, sizeThreshold);
eventReporter.reportEvent(Severity.WARNING, "Provenance Repository", "The rate of the dataflow is " + "exceeding the provenance recording rate. Slowing down flow to accommodate");
while (journalFileCount > journalCountThreshold || repoSize > sizeThreshold) {
// if a shutdown happens while we are in this loop, kill the rollover thread and break
if (this.closed.get()) {
if (future != null) {
future.cancel(true);
}
break;
}
if (repoSize > sizeThreshold) {
logger.debug("Provenance Repository has exceeded its size threshold; will trigger purging of oldest events");
purgeOldEvents();
journalFileCount = getJournalCount();
repoSize = getSize(getLogFiles(), 0L);
continue;
} else {
// due to the runnable that we scheduled above
try {
Thread.sleep(100L);
} catch (final InterruptedException ie) {
}
}
logger.debug("Provenance Repository is still behind. Keeping flow slowed down " + "to accommodate. Currently, there are {} journal files ({} bytes) and " + "threshold for blocking is {} ({} bytes)", journalFileCount, repoSize, journalCountThreshold, sizeThreshold);
journalFileCount = getJournalCount();
repoSize = getSize(getLogFiles(), 0L);
}
final long stopTheWorldNanos = System.nanoTime() - stopTheWorldStart;
backpressurePauseMillis.add(new TimestampedLong(stopTheWorldNanos));
final TimestampedLong pauseNanosLastFiveMinutes = backpressurePauseMillis.getAggregateValue(System.currentTimeMillis() - TimeUnit.MILLISECONDS.convert(5, TimeUnit.MINUTES));
logger.info("Provenance Repository has now caught up with rolling over journal files. Current number of " + "journal files to be rolled over is {}. Provenance Repository Back Pressure paused Session commits for {} ({} total in the last 5 minutes).", journalFileCount, FormatUtils.formatNanos(stopTheWorldNanos, true), FormatUtils.formatNanos(pauseNanosLastFiveMinutes.getValue(), true));
}
// we've finished rolling over successfully. Create new writers and reset state.
writers = createWriters(configuration, idGenerator.get());
dirtyWriterCount.set(0);
streamStartTime.set(System.currentTimeMillis());
recordsWrittenSinceRollover.getAndSet(0);
}
}
use of org.apache.nifi.util.timebuffer.TimedCountSize in project nifi by apache.
the class PersistentProvenanceRepository method persistRecord.
private void persistRecord(final Iterable<ProvenanceEventRecord> records) {
final long totalJournalSize;
readLock.lock();
try {
long bytesWritten = 0L;
// obtain a lock on one of the RecordWriter's so that no other thread is able to write to this writer until we're finished.
// Although the writer itself is thread-safe, we need to generate an event id and then write the event
// atomically, so we need to do this with a lock.
boolean locked = false;
RecordWriter writer;
do {
final RecordWriter[] recordWriters = this.writers;
final int numDirty = dirtyWriterCount.get();
if (numDirty >= recordWriters.length) {
throw new IllegalStateException("Cannot update repository because all partitions are unusable at this time. Writing to the repository would cause corruption. " + "This most often happens as a result of the repository running out of disk space or the JVM running out of memory.");
}
final long idx = writerIndex.getAndIncrement();
writer = recordWriters[(int) (idx % recordWriters.length)];
locked = writer.tryLock();
} while (!locked);
try {
try {
long recordsWritten = 0L;
for (final ProvenanceEventRecord nextRecord : records) {
final StorageSummary persistedEvent = writer.writeRecord(nextRecord);
bytesWritten += persistedEvent.getSerializedLength();
recordsWritten++;
logger.trace("Wrote record with ID {} to {}", persistedEvent.getEventId(), writer);
}
writer.flush();
if (alwaysSync) {
writer.sync();
}
totalJournalSize = bytesWrittenSinceRollover.addAndGet(bytesWritten);
recordsWrittenSinceRollover.getAndIncrement();
this.updateCounts.add(new TimedCountSize(recordsWritten, bytesWritten));
} catch (final Throwable t) {
// We need to set the repoDirty flag before we release the lock for this journal.
// Otherwise, another thread may write to this journal -- this is a problem because
// the journal contains part of our record but not all of it. Writing to the end of this
// journal will result in corruption!
writer.markDirty();
dirtyWriterCount.incrementAndGet();
// force rollover to happen soon.
streamStartTime.set(0L);
throw t;
} finally {
writer.unlock();
}
} catch (final IOException ioe) {
// warn about the failure
logger.error("Failed to persist Provenance Event due to {}.", ioe.toString());
logger.error("", ioe);
eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to persist Provenance Event due to " + ioe.toString());
// Attempt to perform a rollover. An IOException in this part of the code generally is the result of
// running out of disk space. If we have multiple partitions, we may well be able to rollover. This helps
// in two ways: it compresses the journal files which frees up space, and if it ends up merging to a different
// partition/storage directory, we can delete the journals from this directory that ran out of space.
// In order to do this, though, we must switch from a read lock to a write lock.
// This part of the code gets a little bit messy, and we could potentially refactor it a bit in order to
// make the code cleaner.
readLock.unlock();
try {
writeLock.lock();
try {
logger.debug("Obtained write lock to rollover due to IOException on write");
rollover(true);
} finally {
writeLock.unlock();
}
} catch (final Exception e) {
logger.error("Failed to Rollover Provenance Event Repository file due to {}", e.toString());
logger.error("", e);
eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to Rollover Provenance Event Log due to " + e.toString());
} finally {
// we must re-lock the readLock, as the finally block below is going to unlock it.
readLock.lock();
}
return;
}
} finally {
readLock.unlock();
}
// If the total number of bytes written to the Journals is >= configured max, we need to roll over
if (totalJournalSize >= configuration.getMaxEventFileCapacity()) {
writeLock.lock();
try {
logger.debug("Obtained write lock to perform rollover based on file size");
// another thread may have just done it.
if (bytesWrittenSinceRollover.get() >= configuration.getMaxEventFileCapacity()) {
try {
rollover(false);
} catch (final IOException e) {
logger.error("Failed to Rollover Provenance Event Repository file due to {}", e.toString());
logger.error("", e);
eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to Rollover Provenance Event Log due to " + e.toString());
}
}
} finally {
writeLock.unlock();
}
}
}
use of org.apache.nifi.util.timebuffer.TimedCountSize in project nifi-minifi by apache.
the class MiNiFiPersistentProvenanceRepository method persistRecord.
private void persistRecord(final Iterable<ProvenanceEventRecord> records) {
final long totalJournalSize;
readLock.lock();
try {
long bytesWritten = 0L;
// obtain a lock on one of the RecordWriter's so that no other thread is able to write to this writer until we're finished.
// Although the writer itself is thread-safe, we need to generate an event id and then write the event
// atomically, so we need to do this with a lock.
boolean locked = false;
RecordWriter writer;
do {
final RecordWriter[] recordWriters = this.writers;
final int numDirty = dirtyWriterCount.get();
if (numDirty >= recordWriters.length) {
throw new IllegalStateException("Cannot update repository because all partitions are unusable at this time. Writing to the repository would cause corruption. " + "This most often happens as a result of the repository running out of disk space or the JVM running out of memory.");
}
final long idx = writerIndex.getAndIncrement();
writer = recordWriters[(int) (idx % recordWriters.length)];
locked = writer.tryLock();
} while (!locked);
try {
try {
long recordsWritten = 0L;
for (final ProvenanceEventRecord nextRecord : records) {
final StorageSummary persistedEvent = writer.writeRecord(nextRecord);
bytesWritten += persistedEvent.getSerializedLength();
recordsWritten++;
logger.trace("Wrote record with ID {} to {}", persistedEvent.getEventId(), writer);
}
writer.flush();
if (alwaysSync) {
writer.sync();
}
totalJournalSize = bytesWrittenSinceRollover.addAndGet(bytesWritten);
recordsWrittenSinceRollover.getAndIncrement();
this.updateCounts.add(new TimedCountSize(recordsWritten, bytesWritten));
} catch (final Throwable t) {
// We need to set the repoDirty flag before we release the lock for this journal.
// Otherwise, another thread may write to this journal -- this is a problem because
// the journal contains part of our record but not all of it. Writing to the end of this
// journal will result in corruption!
writer.markDirty();
dirtyWriterCount.incrementAndGet();
// force rollover to happen soon.
streamStartTime.set(0L);
throw t;
} finally {
writer.unlock();
}
} catch (final IOException ioe) {
// warn about the failure
logger.error("Failed to persist Provenance Event due to {}.", ioe.toString());
logger.error("", ioe);
eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to persist Provenance Event due to " + ioe.toString());
// Attempt to perform a rollover. An IOException in this part of the code generally is the result of
// running out of disk space. If we have multiple partitions, we may well be able to rollover. This helps
// in two ways: it compresses the journal files which frees up space, and if it ends up merging to a different
// partition/storage directory, we can delete the journals from this directory that ran out of space.
// In order to do this, though, we must switch from a read lock to a write lock.
// This part of the code gets a little bit messy, and we could potentially refactor it a bit in order to
// make the code cleaner.
readLock.unlock();
try {
writeLock.lock();
try {
logger.debug("Obtained write lock to rollover due to IOException on write");
rollover(true);
} finally {
writeLock.unlock();
}
} catch (final Exception e) {
logger.error("Failed to Rollover Provenance Event Repository file due to {}", e.toString());
logger.error("", e);
eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to Rollover Provenance Event Log due to " + e.toString());
} finally {
// we must re-lock the readLock, as the finally block below is going to unlock it.
readLock.lock();
}
return;
}
} finally {
readLock.unlock();
}
// If the total number of bytes written to the Journals is >= configured max, we need to roll over
if (totalJournalSize >= configuration.getMaxEventFileCapacity()) {
writeLock.lock();
try {
logger.debug("Obtained write lock to perform rollover based on file size");
// another thread may have just done it.
if (bytesWrittenSinceRollover.get() >= configuration.getMaxEventFileCapacity()) {
try {
rollover(false);
} catch (final IOException e) {
logger.error("Failed to Rollover Provenance Event Repository file due to {}", e.toString());
logger.error("", e);
eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to Rollover Provenance Event Log due to " + e.toString());
}
}
} finally {
writeLock.unlock();
}
}
}
use of org.apache.nifi.util.timebuffer.TimedCountSize in project nifi-minifi by apache.
the class MiNiFiPersistentProvenanceRepository method rollover.
/**
* <p>
* MUST be called with the write lock held.
* </p>
*
* Rolls over the data in the journal files, merging them into a single
* Provenance Event Log File, and compressing and indexing as needed.
*
* @param force if true, will force a rollover regardless of whether or not
* data has been written
* @throws IOException if unable to complete rollover
*/
private void rollover(final boolean force) throws IOException {
if (!configuration.isAllowRollover()) {
return;
}
// have written something to the stream, then roll over
if (force || recordsWrittenSinceRollover.get() > 0L || dirtyWriterCount.get() > 0) {
final List<File> journalsToMerge = new ArrayList<>();
for (final RecordWriter writer : writers) {
if (!writer.isClosed()) {
final File writerFile = writer.getFile();
journalsToMerge.add(writerFile);
try {
writer.close();
} catch (final IOException ioe) {
logger.warn("Failed to close {} due to {}", writer, ioe.toString());
if (logger.isDebugEnabled()) {
logger.warn("", ioe);
}
}
}
}
if (logger.isDebugEnabled()) {
if (journalsToMerge.isEmpty()) {
logger.debug("No journals to merge; all RecordWriters were already closed");
} else {
logger.debug("Going to merge {} files for journals starting with ID {}", journalsToMerge.size(), LuceneUtil.substringBefore(journalsToMerge.get(0).getName(), "."));
}
}
// Choose a storage directory to store the merged file in.
final long storageDirIdx = storageDirectoryIndex.getAndIncrement();
final List<File> storageDirs = new ArrayList<>(configuration.getStorageDirectories().values());
final File storageDir = storageDirs.get((int) (storageDirIdx % storageDirs.size()));
Future<?> future = null;
if (!journalsToMerge.isEmpty()) {
// Run the rollover logic in a background thread.
final AtomicReference<Future<?>> futureReference = new AtomicReference<>();
final AtomicInteger retryAttempts = new AtomicInteger(MAX_JOURNAL_ROLLOVER_RETRIES);
final int recordsWritten = recordsWrittenSinceRollover.getAndSet(0);
final Runnable rolloverRunnable = new Runnable() {
@Override
public void run() {
File fileRolledOver = null;
try {
try {
fileRolledOver = mergeJournals(journalsToMerge, getMergeFile(journalsToMerge, storageDir), eventReporter);
} catch (final IOException ioe) {
logger.error("Failed to merge Journal Files {} into a Provenance Log File due to {}", journalsToMerge, ioe.toString());
logger.error("", ioe);
}
if (fileRolledOver != null) {
final File file = fileRolledOver;
// update our map of id to Path
// We need to make sure that another thread doesn't also update the map at the same time. We cannot
// use the write lock when purging old events, and we want to use the same approach here.
boolean updated = false;
final Long fileFirstEventId = Long.valueOf(LuceneUtil.substringBefore(fileRolledOver.getName(), "."));
while (!updated) {
final SortedMap<Long, Path> existingPathMap = idToPathMap.get();
final SortedMap<Long, Path> newIdToPathMap = new TreeMap<>(new PathMapComparator());
newIdToPathMap.putAll(existingPathMap);
newIdToPathMap.put(fileFirstEventId, file.toPath());
updated = idToPathMap.compareAndSet(existingPathMap, newIdToPathMap);
}
final TimedCountSize countSize = updateCounts.getAggregateValue(System.currentTimeMillis() - TimeUnit.MILLISECONDS.convert(5, TimeUnit.MINUTES));
logger.info("Successfully Rolled over Provenance Event file containing {} records. In the past 5 minutes, " + "{} events have been written to the Provenance Repository, totaling {}", recordsWritten, countSize.getCount(), FormatUtils.formatDataSize(countSize.getSize()));
}
// if files were rolled over or if out of retries stop the future
if (fileRolledOver != null || retryAttempts.decrementAndGet() == 0) {
if (fileRolledOver == null && retryAttempts.get() == 0) {
logger.error("Failed to merge Journal Files {} after {} attempts.", journalsToMerge, MAX_JOURNAL_ROLLOVER_RETRIES);
}
rolloverCompletions.getAndIncrement();
// Cancel the future so that we don't run anymore
Future<?> future;
while ((future = futureReference.get()) == null) {
try {
Thread.sleep(10L);
} catch (final InterruptedException ie) {
}
}
future.cancel(false);
} else {
logger.warn("Couldn't merge journals. Will try again. journalsToMerge: {}, storageDir: {}", journalsToMerge, storageDir);
}
} catch (final Exception e) {
logger.error("Failed to merge journals. Will try again. journalsToMerge: {}, storageDir: {}, cause: {}", journalsToMerge, storageDir, e.toString());
logger.error("", e);
}
}
};
// We are going to schedule the future to run immediately and then repeat every 10 seconds. This allows us to keep retrying if we
// fail for some reason. When we succeed or if retries are exceeded, the Runnable will cancel itself.
future = rolloverExecutor.scheduleWithFixedDelay(rolloverRunnable, 0, getRolloverRetryMillis(), TimeUnit.MILLISECONDS);
futureReference.set(future);
}
streamStartTime.set(System.currentTimeMillis());
bytesWrittenSinceRollover.set(0);
// We don't want to create new 'writers' until the number of unmerged journals falls below our threshold. So we wait
// here before we repopulate the 'writers' member variable and release the lock.
int journalFileCount = getJournalCount();
long repoSize = getSize(getLogFiles(), 0L);
final int journalCountThreshold = configuration.getJournalCount() * 5;
// do not go over 10% of max capacity
final long sizeThreshold = (long) (configuration.getMaxStorageCapacity() * 1.1D);
// that is no longer the case.
if (journalFileCount > journalCountThreshold || repoSize > sizeThreshold) {
final long stopTheWorldStart = System.nanoTime();
logger.warn("The rate of the dataflow is exceeding the provenance recording rate. " + "Slowing down flow to accommodate. Currently, there are {} journal files ({} bytes) and " + "threshold for blocking is {} ({} bytes)", journalFileCount, repoSize, journalCountThreshold, sizeThreshold);
eventReporter.reportEvent(Severity.WARNING, "Provenance Repository", "The rate of the dataflow is " + "exceeding the provenance recording rate. Slowing down flow to accommodate");
while (journalFileCount > journalCountThreshold || repoSize > sizeThreshold) {
// if a shutdown happens while we are in this loop, kill the rollover thread and break
if (this.closed.get()) {
if (future != null) {
future.cancel(true);
}
break;
}
if (repoSize > sizeThreshold) {
logger.debug("Provenance Repository has exceeded its size threshold; will trigger purging of oldest events");
purgeOldEvents();
journalFileCount = getJournalCount();
repoSize = getSize(getLogFiles(), 0L);
continue;
} else {
// due to the runnable that we scheduled above
try {
Thread.sleep(100L);
} catch (final InterruptedException ie) {
}
}
logger.debug("Provenance Repository is still behind. Keeping flow slowed down " + "to accommodate. Currently, there are {} journal files ({} bytes) and " + "threshold for blocking is {} ({} bytes)", journalFileCount, repoSize, journalCountThreshold, sizeThreshold);
journalFileCount = getJournalCount();
repoSize = getSize(getLogFiles(), 0L);
}
final long stopTheWorldNanos = System.nanoTime() - stopTheWorldStart;
backpressurePauseMillis.add(new TimestampedLong(stopTheWorldNanos));
final TimestampedLong pauseNanosLastFiveMinutes = backpressurePauseMillis.getAggregateValue(System.currentTimeMillis() - TimeUnit.MILLISECONDS.convert(5, TimeUnit.MINUTES));
logger.info("Provenance Repository has now caught up with rolling over journal files. Current number of " + "journal files to be rolled over is {}. Provenance Repository Back Pressure paused Session commits for {} ({} total in the last 5 minutes).", journalFileCount, FormatUtils.formatNanos(stopTheWorldNanos, true), FormatUtils.formatNanos(pauseNanosLastFiveMinutes.getValue(), true));
}
// we've finished rolling over successfully. Create new writers and reset state.
writers = createWriters(configuration, idGenerator.get());
dirtyWriterCount.set(0);
streamStartTime.set(System.currentTimeMillis());
recordsWrittenSinceRollover.getAndSet(0);
}
}
Aggregations