use of org.apache.nifi.util.timebuffer.TimestampedLong in project nifi by apache.
the class EventIdFirstSchemaRecordWriter method writeRecord.
@Override
public StorageSummary writeRecord(final ProvenanceEventRecord record) throws IOException {
if (isDirty()) {
throw new IOException("Cannot update Provenance Repository because this Record Writer has already failed to write to the Repository");
}
final long lockStart;
final long writeStart;
final long startBytes;
final long endBytes;
final long recordIdentifier;
final long serializeStart = System.nanoTime();
final ByteArrayDataOutputStream bados = streamCache.checkOut();
try {
writeRecord(record, 0L, bados.getDataOutputStream());
lockStart = System.nanoTime();
synchronized (this) {
writeStart = System.nanoTime();
try {
recordIdentifier = record.getEventId() == -1L ? getIdGenerator().getAndIncrement() : record.getEventId();
startBytes = getBytesWritten();
ensureStreamState(recordIdentifier, startBytes);
final DataOutputStream out = getBufferedOutputStream();
final int recordIdOffset = (int) (recordIdentifier - firstEventId);
out.writeInt(recordIdOffset);
final ByteArrayOutputStream baos = bados.getByteArrayOutputStream();
out.writeInt(baos.size());
baos.writeTo(out);
recordCount.incrementAndGet();
endBytes = getBytesWritten();
} catch (final IOException ioe) {
markDirty();
throw ioe;
}
}
} finally {
streamCache.checkIn(bados);
}
if (logger.isDebugEnabled()) {
// Collect stats and periodically dump them if log level is set to at least info.
final long writeNanos = System.nanoTime() - writeStart;
writeTimes.add(new TimestampedLong(writeNanos));
final long serializeNanos = lockStart - serializeStart;
serializeTimes.add(new TimestampedLong(serializeNanos));
final long lockNanos = writeStart - lockStart;
lockTimes.add(new TimestampedLong(lockNanos));
bytesWritten.add(new TimestampedLong(endBytes - startBytes));
final long recordCount = totalRecordCount.incrementAndGet();
if (recordCount % 1_000_000 == 0) {
final long sixtySecondsAgo = System.currentTimeMillis() - 60000L;
final Long writeNanosLast60 = writeTimes.getAggregateValue(sixtySecondsAgo).getValue();
final Long lockNanosLast60 = lockTimes.getAggregateValue(sixtySecondsAgo).getValue();
final Long serializeNanosLast60 = serializeTimes.getAggregateValue(sixtySecondsAgo).getValue();
final Long bytesWrittenLast60 = bytesWritten.getAggregateValue(sixtySecondsAgo).getValue();
logger.debug("In the last 60 seconds, have spent {} millis writing to file ({} MB), {} millis waiting on synchronize block, {} millis serializing events", TimeUnit.NANOSECONDS.toMillis(writeNanosLast60), bytesWrittenLast60 / 1024 / 1024, TimeUnit.NANOSECONDS.toMillis(lockNanosLast60), TimeUnit.NANOSECONDS.toMillis(serializeNanosLast60));
}
}
final long serializedLength = endBytes - startBytes;
final TocWriter tocWriter = getTocWriter();
final Integer blockIndex = tocWriter == null ? null : tocWriter.getCurrentBlockIndex();
final File file = getFile();
final String storageLocation = file.getParentFile().getName() + "/" + file.getName();
return new StorageSummary(recordIdentifier, storageLocation, blockIndex, serializedLength, endBytes);
}
use of org.apache.nifi.util.timebuffer.TimestampedLong in project nifi by apache.
the class PersistentProvenanceRepository method rollover.
/**
* <p>
* MUST be called with the write lock held.
* </p>
*
* Rolls over the data in the journal files, merging them into a single
* Provenance Event Log File, and compressing and indexing as needed.
*
* @param force if true, will force a rollover regardless of whether or not
* data has been written
* @throws IOException if unable to complete rollover
*/
private void rollover(final boolean force) throws IOException {
if (!configuration.isAllowRollover()) {
return;
}
// have written something to the stream, then roll over
if (force || recordsWrittenSinceRollover.get() > 0L || dirtyWriterCount.get() > 0) {
final List<File> journalsToMerge = new ArrayList<>();
for (final RecordWriter writer : writers) {
if (!writer.isClosed()) {
final File writerFile = writer.getFile();
journalsToMerge.add(writerFile);
try {
writer.close();
} catch (final IOException ioe) {
logger.warn("Failed to close {} due to {}", writer, ioe.toString());
if (logger.isDebugEnabled()) {
logger.warn("", ioe);
}
}
}
}
if (logger.isDebugEnabled()) {
if (journalsToMerge.isEmpty()) {
logger.debug("No journals to merge; all RecordWriters were already closed");
} else {
logger.debug("Going to merge {} files for journals starting with ID {}", journalsToMerge.size(), LuceneUtil.substringBefore(journalsToMerge.get(0).getName(), "."));
}
}
// Choose a storage directory to store the merged file in.
final long storageDirIdx = storageDirectoryIndex.getAndIncrement();
final List<File> storageDirs = new ArrayList<>(configuration.getStorageDirectories().values());
final File storageDir = storageDirs.get((int) (storageDirIdx % storageDirs.size()));
Future<?> future = null;
if (!journalsToMerge.isEmpty()) {
// Run the rollover logic in a background thread.
final AtomicReference<Future<?>> futureReference = new AtomicReference<>();
final AtomicInteger retryAttempts = new AtomicInteger(MAX_JOURNAL_ROLLOVER_RETRIES);
final int recordsWritten = recordsWrittenSinceRollover.getAndSet(0);
final Runnable rolloverRunnable = new Runnable() {
@Override
public void run() {
File fileRolledOver = null;
try {
try {
fileRolledOver = mergeJournals(journalsToMerge, getMergeFile(journalsToMerge, storageDir), eventReporter);
} catch (final IOException ioe) {
logger.error("Failed to merge Journal Files {} into a Provenance Log File due to {}", journalsToMerge, ioe.toString());
logger.error("", ioe);
}
if (fileRolledOver != null) {
final File file = fileRolledOver;
// update our map of id to Path
// We need to make sure that another thread doesn't also update the map at the same time. We cannot
// use the write lock when purging old events, and we want to use the same approach here.
boolean updated = false;
final Long fileFirstEventId = Long.valueOf(LuceneUtil.substringBefore(fileRolledOver.getName(), "."));
while (!updated) {
final SortedMap<Long, Path> existingPathMap = idToPathMap.get();
final SortedMap<Long, Path> newIdToPathMap = new TreeMap<>(new PathMapComparator());
newIdToPathMap.putAll(existingPathMap);
newIdToPathMap.put(fileFirstEventId, file.toPath());
updated = idToPathMap.compareAndSet(existingPathMap, newIdToPathMap);
}
final TimedCountSize countSize = updateCounts.getAggregateValue(System.currentTimeMillis() - TimeUnit.MILLISECONDS.convert(5, TimeUnit.MINUTES));
logger.info("Successfully Rolled over Provenance Event file containing {} records. In the past 5 minutes, " + "{} events have been written to the Provenance Repository, totaling {}", recordsWritten, countSize.getCount(), FormatUtils.formatDataSize(countSize.getSize()));
}
// if files were rolled over or if out of retries stop the future
if (fileRolledOver != null || retryAttempts.decrementAndGet() == 0) {
if (fileRolledOver == null && retryAttempts.get() == 0) {
logger.error("Failed to merge Journal Files {} after {} attempts.", journalsToMerge, MAX_JOURNAL_ROLLOVER_RETRIES);
}
rolloverCompletions.getAndIncrement();
// Cancel the future so that we don't run anymore
Future<?> future;
while ((future = futureReference.get()) == null) {
try {
Thread.sleep(10L);
} catch (final InterruptedException ie) {
}
}
future.cancel(false);
} else {
logger.warn("Couldn't merge journals. Will try again. journalsToMerge: {}, storageDir: {}", journalsToMerge, storageDir);
}
} catch (final Exception e) {
logger.error("Failed to merge journals. Will try again. journalsToMerge: {}, storageDir: {}, cause: {}", journalsToMerge, storageDir, e.toString());
logger.error("", e);
}
}
};
// We are going to schedule the future to run immediately and then repeat every 10 seconds. This allows us to keep retrying if we
// fail for some reason. When we succeed or if retries are exceeded, the Runnable will cancel itself.
future = rolloverExecutor.scheduleWithFixedDelay(rolloverRunnable, 0, getRolloverRetryMillis(), TimeUnit.MILLISECONDS);
futureReference.set(future);
}
streamStartTime.set(System.currentTimeMillis());
bytesWrittenSinceRollover.set(0);
// We don't want to create new 'writers' until the number of unmerged journals falls below our threshold. So we wait
// here before we repopulate the 'writers' member variable and release the lock.
int journalFileCount = getJournalCount();
long repoSize = getSize(getLogFiles(), 0L);
final int journalCountThreshold = configuration.getJournalCount() * 5;
final long sizeThreshold = (long) (configuration.getMaxStorageCapacity() * ROLLOVER_HIGH_WATER);
// that is no longer the case.
if (journalFileCount > journalCountThreshold || repoSize > sizeThreshold) {
final long stopTheWorldStart = System.nanoTime();
logger.warn("The rate of the dataflow is exceeding the provenance recording rate. " + "Slowing down flow to accommodate. Currently, there are {} journal files ({} bytes) and " + "threshold for blocking is {} ({} bytes)", journalFileCount, repoSize, journalCountThreshold, sizeThreshold);
eventReporter.reportEvent(Severity.WARNING, "Provenance Repository", "The rate of the dataflow is " + "exceeding the provenance recording rate. Slowing down flow to accommodate");
while (journalFileCount > journalCountThreshold || repoSize > sizeThreshold) {
// if a shutdown happens while we are in this loop, kill the rollover thread and break
if (this.closed.get()) {
if (future != null) {
future.cancel(true);
}
break;
}
if (repoSize > sizeThreshold) {
logger.debug("Provenance Repository has exceeded its size threshold; will trigger purging of oldest events");
purgeOldEvents();
journalFileCount = getJournalCount();
repoSize = getSize(getLogFiles(), 0L);
continue;
} else {
// due to the runnable that we scheduled above
try {
Thread.sleep(100L);
} catch (final InterruptedException ie) {
}
}
logger.debug("Provenance Repository is still behind. Keeping flow slowed down " + "to accommodate. Currently, there are {} journal files ({} bytes) and " + "threshold for blocking is {} ({} bytes)", journalFileCount, repoSize, journalCountThreshold, sizeThreshold);
journalFileCount = getJournalCount();
repoSize = getSize(getLogFiles(), 0L);
}
final long stopTheWorldNanos = System.nanoTime() - stopTheWorldStart;
backpressurePauseMillis.add(new TimestampedLong(stopTheWorldNanos));
final TimestampedLong pauseNanosLastFiveMinutes = backpressurePauseMillis.getAggregateValue(System.currentTimeMillis() - TimeUnit.MILLISECONDS.convert(5, TimeUnit.MINUTES));
logger.info("Provenance Repository has now caught up with rolling over journal files. Current number of " + "journal files to be rolled over is {}. Provenance Repository Back Pressure paused Session commits for {} ({} total in the last 5 minutes).", journalFileCount, FormatUtils.formatNanos(stopTheWorldNanos, true), FormatUtils.formatNanos(pauseNanosLastFiveMinutes.getValue(), true));
}
// we've finished rolling over successfully. Create new writers and reset state.
writers = createWriters(configuration, idGenerator.get());
dirtyWriterCount.set(0);
streamStartTime.set(System.currentTimeMillis());
recordsWrittenSinceRollover.getAndSet(0);
}
}
use of org.apache.nifi.util.timebuffer.TimestampedLong in project nifi by apache.
the class LuceneEventIndex method incrementAndReportStats.
private void incrementAndReportStats() {
final long fiveMinutesAgo = System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(5);
final TimestampedLong nanosLastFive = queuePauseNanos.getAggregateValue(fiveMinutesAgo);
if (nanosLastFive == null) {
return;
}
final TimestampedLong eventsLast5 = eventsIndexed.getAggregateValue(fiveMinutesAgo);
if (eventsLast5 == null) {
return;
}
final long numEventsLast5 = eventsLast5.getValue();
final long millis = TimeUnit.NANOSECONDS.toMillis(nanosLastFive.getValue());
logger.debug("In the last 5 minutes, have spent {} CPU-millis waiting to enqueue events for indexing and have indexed {} events ({} since NiFi started)", millis, numEventsLast5, eventCount.get());
}
use of org.apache.nifi.util.timebuffer.TimestampedLong in project nifi by apache.
the class EncryptedSchemaRecordWriter method writeRecord.
@Override
public StorageSummary writeRecord(final ProvenanceEventRecord record) throws IOException {
final long encryptStart = System.nanoTime();
byte[] cipherBytes;
try {
byte[] serialized;
try (final ByteArrayOutputStream baos = new ByteArrayOutputStream(256);
final DataOutputStream dos = new DataOutputStream(baos)) {
writeRecord(record, 0L, dos);
serialized = baos.toByteArray();
}
String eventId = record.getBestEventIdentifier();
cipherBytes = encrypt(serialized, eventId);
} catch (EncryptionException e) {
logger.error("Encountered an error: ", e);
throw new IOException("Error encrypting the provenance record", e);
}
final long encryptStop = System.nanoTime();
final long lockStart = System.nanoTime();
final long writeStart;
final long startBytes;
final long endBytes;
final long recordIdentifier;
synchronized (this) {
writeStart = System.nanoTime();
try {
recordIdentifier = record.getEventId() == -1L ? getIdGenerator().getAndIncrement() : record.getEventId();
startBytes = getBytesWritten();
ensureStreamState(recordIdentifier, startBytes);
final DataOutputStream out = getBufferedOutputStream();
final int recordIdOffset = (int) (recordIdentifier - getFirstEventId());
out.writeInt(recordIdOffset);
out.writeInt(cipherBytes.length);
out.write(cipherBytes);
getRecordCount().incrementAndGet();
endBytes = getBytesWritten();
} catch (final IOException ioe) {
markDirty();
throw ioe;
}
}
if (logger.isDebugEnabled()) {
// Collect stats and periodically dump them if log level is set to at least info.
final long writeNanos = System.nanoTime() - writeStart;
getWriteTimes().add(new TimestampedLong(writeNanos));
final long serializeNanos = lockStart - encryptStart;
getSerializeTimes().add(new TimestampedLong(serializeNanos));
final long encryptNanos = encryptStop - encryptStart;
getEncryptTimes().add(new TimestampedLong(encryptNanos));
final long lockNanos = writeStart - lockStart;
getLockTimes().add(new TimestampedLong(lockNanos));
getBytesWrittenBuffer().add(new TimestampedLong(endBytes - startBytes));
final long recordCount = getTotalRecordCount().incrementAndGet();
if (recordCount % debugFrequency == 0) {
printStats();
}
}
final long serializedLength = endBytes - startBytes;
final TocWriter tocWriter = getTocWriter();
final Integer blockIndex = tocWriter == null ? null : tocWriter.getCurrentBlockIndex();
final File file = getFile();
final String storageLocation = file.getParentFile().getName() + "/" + file.getName();
return new StorageSummary(recordIdentifier, storageLocation, blockIndex, serializedLength, endBytes);
}
use of org.apache.nifi.util.timebuffer.TimestampedLong in project nifi by apache.
the class LuceneEventIndex method addEvent.
protected void addEvent(final ProvenanceEventRecord event, final StorageSummary location) {
for (final CachedQuery cachedQuery : cachedQueries) {
cachedQuery.update(event, location);
}
final Document document = eventConverter.convert(event, location);
if (document == null) {
logger.debug("Received Provenance Event {} to index but it contained no information that should be indexed, so skipping it", event.getEventId());
} else {
final StoredDocument doc = new StoredDocument(document, location);
boolean added = false;
while (!added && !closed) {
added = documentQueue.offer(doc);
if (!added) {
final long start = System.nanoTime();
try {
added = documentQueue.offer(doc, 1, TimeUnit.SECONDS);
} catch (final InterruptedException e) {
Thread.currentThread().interrupt();
logger.warn("Interrupted while attempting to enqueue Provenance Event for indexing; this event will not be indexed");
return;
}
final long nanos = System.nanoTime() - start;
queuePauseNanos.add(new TimestampedLong(nanos));
}
if (added) {
final long totalEventCount = eventCount.incrementAndGet();
if (totalEventCount % 1_000_000 == 0 && logger.isDebugEnabled()) {
incrementAndReportStats();
}
}
}
}
}
Aggregations