use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.
the class PersistentProvenanceRepository method persistRecord.
private void persistRecord(final Iterable<ProvenanceEventRecord> records) {
final long totalJournalSize;
readLock.lock();
try {
long bytesWritten = 0L;
// obtain a lock on one of the RecordWriter's so that no other thread is able to write to this writer until we're finished.
// Although the writer itself is thread-safe, we need to generate an event id and then write the event
// atomically, so we need to do this with a lock.
boolean locked = false;
RecordWriter writer;
do {
final RecordWriter[] recordWriters = this.writers;
final int numDirty = dirtyWriterCount.get();
if (numDirty >= recordWriters.length) {
throw new IllegalStateException("Cannot update repository because all partitions are unusable at this time. Writing to the repository would cause corruption. " + "This most often happens as a result of the repository running out of disk space or the JVM running out of memory.");
}
final long idx = writerIndex.getAndIncrement();
writer = recordWriters[(int) (idx % recordWriters.length)];
locked = writer.tryLock();
} while (!locked);
try {
try {
long recordsWritten = 0L;
for (final ProvenanceEventRecord nextRecord : records) {
final StorageSummary persistedEvent = writer.writeRecord(nextRecord);
bytesWritten += persistedEvent.getSerializedLength();
recordsWritten++;
logger.trace("Wrote record with ID {} to {}", persistedEvent.getEventId(), writer);
}
writer.flush();
if (alwaysSync) {
writer.sync();
}
totalJournalSize = bytesWrittenSinceRollover.addAndGet(bytesWritten);
recordsWrittenSinceRollover.getAndIncrement();
this.updateCounts.add(new TimedCountSize(recordsWritten, bytesWritten));
} catch (final Throwable t) {
// We need to set the repoDirty flag before we release the lock for this journal.
// Otherwise, another thread may write to this journal -- this is a problem because
// the journal contains part of our record but not all of it. Writing to the end of this
// journal will result in corruption!
writer.markDirty();
dirtyWriterCount.incrementAndGet();
// force rollover to happen soon.
streamStartTime.set(0L);
throw t;
} finally {
writer.unlock();
}
} catch (final IOException ioe) {
// warn about the failure
logger.error("Failed to persist Provenance Event due to {}.", ioe.toString());
logger.error("", ioe);
eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to persist Provenance Event due to " + ioe.toString());
// Attempt to perform a rollover. An IOException in this part of the code generally is the result of
// running out of disk space. If we have multiple partitions, we may well be able to rollover. This helps
// in two ways: it compresses the journal files which frees up space, and if it ends up merging to a different
// partition/storage directory, we can delete the journals from this directory that ran out of space.
// In order to do this, though, we must switch from a read lock to a write lock.
// This part of the code gets a little bit messy, and we could potentially refactor it a bit in order to
// make the code cleaner.
readLock.unlock();
try {
writeLock.lock();
try {
logger.debug("Obtained write lock to rollover due to IOException on write");
rollover(true);
} finally {
writeLock.unlock();
}
} catch (final Exception e) {
logger.error("Failed to Rollover Provenance Event Repository file due to {}", e.toString());
logger.error("", e);
eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to Rollover Provenance Event Log due to " + e.toString());
} finally {
// we must re-lock the readLock, as the finally block below is going to unlock it.
readLock.lock();
}
return;
}
} finally {
readLock.unlock();
}
// If the total number of bytes written to the Journals is >= configured max, we need to roll over
if (totalJournalSize >= configuration.getMaxEventFileCapacity()) {
writeLock.lock();
try {
logger.debug("Obtained write lock to perform rollover based on file size");
// another thread may have just done it.
if (bytesWrittenSinceRollover.get() >= configuration.getMaxEventFileCapacity()) {
try {
rollover(false);
} catch (final IOException e) {
logger.error("Failed to Rollover Provenance Event Repository file due to {}", e.toString());
logger.error("", e);
eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to Rollover Provenance Event Log due to " + e.toString());
}
}
} finally {
writeLock.unlock();
}
}
}
use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.
the class WriteAheadProvenanceRepository method registerEvents.
@Override
public void registerEvents(final Iterable<ProvenanceEventRecord> events) {
final StorageResult storageResult;
try {
storageResult = eventStore.addEvents(events);
} catch (final IOException e) {
logger.error("Failed to write events to the Event Store", e);
eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to write Provenance Events to the repository. See logs for more details.");
return;
}
final Map<ProvenanceEventRecord, StorageSummary> locationMap = storageResult.getStorageLocations();
if (!locationMap.isEmpty()) {
eventIndex.addEvents(locationMap);
}
}
use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.
the class WriteAheadStorePartition method reindexLatestEvents.
void reindexLatestEvents(final EventIndex eventIndex) {
final List<File> eventFiles = getEventFilesFromDisk().sorted(DirectoryUtils.SMALLEST_ID_FIRST).collect(Collectors.toList());
if (eventFiles.isEmpty()) {
return;
}
final long minEventIdToReindex = eventIndex.getMinimumEventIdToReindex(partitionName);
final long maxEventId = getMaxEventId();
final long eventsToReindex = maxEventId - minEventIdToReindex;
logger.info("The last Provenance Event indexed for partition {} is {}, but the last event written to partition has ID {}. " + "Re-indexing up to the last {} events to ensure that the Event Index is accurate and up-to-date", partitionName, minEventIdToReindex, maxEventId, eventsToReindex, partitionDirectory);
// Find the first event file that we care about.
int firstEventFileIndex = 0;
for (int i = eventFiles.size() - 1; i >= 0; i--) {
final File eventFile = eventFiles.get(i);
final long minIdInFile = DirectoryUtils.getMinId(eventFile);
if (minIdInFile <= minEventIdToReindex) {
firstEventFileIndex = i;
break;
}
}
// Create a subList that contains the files of interest
final List<File> eventFilesToReindex = eventFiles.subList(firstEventFileIndex, eventFiles.size());
final ExecutorService executor = Executors.newFixedThreadPool(Math.min(4, eventFilesToReindex.size()), new NamedThreadFactory("Re-Index Provenance Events", true));
final List<Future<?>> futures = new ArrayList<>(eventFilesToReindex.size());
final AtomicLong reindexedCount = new AtomicLong(0L);
// Re-Index the last bunch of events.
// We don't use an Event Iterator here because it's possible that one of the event files could be corrupt (for example, if NiFi does while
// writing to the file, a record may be incomplete). We don't want to prevent us from moving on and continuing to index the rest of the
// un-indexed events. So we just use a List of files and create a reader for each one.
final long start = System.nanoTime();
int fileCount = 0;
for (final File eventFile : eventFilesToReindex) {
final boolean skipToEvent;
if (fileCount++ == 0) {
skipToEvent = true;
} else {
skipToEvent = false;
}
final Runnable reindexTask = new Runnable() {
@Override
public void run() {
final Map<ProvenanceEventRecord, StorageSummary> storageMap = new HashMap<>(1000);
try (final RecordReader recordReader = recordReaderFactory.newRecordReader(eventFile, Collections.emptyList(), Integer.MAX_VALUE)) {
if (skipToEvent) {
final Optional<ProvenanceEventRecord> eventOption = recordReader.skipToEvent(minEventIdToReindex);
if (!eventOption.isPresent()) {
return;
}
}
StandardProvenanceEventRecord event = null;
while (true) {
final long startBytesConsumed = recordReader.getBytesConsumed();
event = recordReader.nextRecord();
if (event == null) {
eventIndex.reindexEvents(storageMap);
reindexedCount.addAndGet(storageMap.size());
storageMap.clear();
// stop reading from this file
break;
} else {
final long eventSize = recordReader.getBytesConsumed() - startBytesConsumed;
storageMap.put(event, new StorageSummary(event.getEventId(), eventFile.getName(), partitionName, recordReader.getBlockIndex(), eventSize, 0L));
if (storageMap.size() == 1000) {
eventIndex.reindexEvents(storageMap);
reindexedCount.addAndGet(storageMap.size());
storageMap.clear();
}
}
}
} catch (final EOFException eof) {
// Ran out of data. Continue on.
logger.warn("Failed to find event with ID {} in Event File {} due to {}", minEventIdToReindex, eventFile, eof.toString());
} catch (final Exception e) {
logger.error("Failed to index Provenance Events found in {}", eventFile, e);
}
}
};
futures.add(executor.submit(reindexTask));
}
for (final Future<?> future : futures) {
try {
future.get();
} catch (final ExecutionException ee) {
logger.error("Failed to re-index some Provenance events. These events may not be query-able via the Provenance interface", ee.getCause());
} catch (final InterruptedException e) {
Thread.currentThread().interrupt();
logger.error("Interrupted while waiting for Provenance events to be re-indexed", e);
break;
}
}
try {
eventIndex.commitChanges(partitionName);
} catch (final IOException e) {
logger.error("Failed to re-index Provenance Events for partition " + partitionName, e);
}
executor.shutdown();
final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
final long seconds = millis / 1000L;
final long millisRemainder = millis % 1000L;
logger.info("Finished re-indexing {} events across {} files for {} in {}.{} seconds", reindexedCount.get(), eventFilesToReindex.size(), partitionDirectory, seconds, millisRemainder);
}
use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.
the class EncryptedSchemaRecordWriter method writeRecord.
@Override
public StorageSummary writeRecord(final ProvenanceEventRecord record) throws IOException {
final long encryptStart = System.nanoTime();
byte[] cipherBytes;
try {
byte[] serialized;
try (final ByteArrayOutputStream baos = new ByteArrayOutputStream(256);
final DataOutputStream dos = new DataOutputStream(baos)) {
writeRecord(record, 0L, dos);
serialized = baos.toByteArray();
}
String eventId = record.getBestEventIdentifier();
cipherBytes = encrypt(serialized, eventId);
} catch (EncryptionException e) {
logger.error("Encountered an error: ", e);
throw new IOException("Error encrypting the provenance record", e);
}
final long encryptStop = System.nanoTime();
final long lockStart = System.nanoTime();
final long writeStart;
final long startBytes;
final long endBytes;
final long recordIdentifier;
synchronized (this) {
writeStart = System.nanoTime();
try {
recordIdentifier = record.getEventId() == -1L ? getIdGenerator().getAndIncrement() : record.getEventId();
startBytes = getBytesWritten();
ensureStreamState(recordIdentifier, startBytes);
final DataOutputStream out = getBufferedOutputStream();
final int recordIdOffset = (int) (recordIdentifier - getFirstEventId());
out.writeInt(recordIdOffset);
out.writeInt(cipherBytes.length);
out.write(cipherBytes);
getRecordCount().incrementAndGet();
endBytes = getBytesWritten();
} catch (final IOException ioe) {
markDirty();
throw ioe;
}
}
if (logger.isDebugEnabled()) {
// Collect stats and periodically dump them if log level is set to at least info.
final long writeNanos = System.nanoTime() - writeStart;
getWriteTimes().add(new TimestampedLong(writeNanos));
final long serializeNanos = lockStart - encryptStart;
getSerializeTimes().add(new TimestampedLong(serializeNanos));
final long encryptNanos = encryptStop - encryptStart;
getEncryptTimes().add(new TimestampedLong(encryptNanos));
final long lockNanos = writeStart - lockStart;
getLockTimes().add(new TimestampedLong(lockNanos));
getBytesWrittenBuffer().add(new TimestampedLong(endBytes - startBytes));
final long recordCount = getTotalRecordCount().incrementAndGet();
if (recordCount % debugFrequency == 0) {
printStats();
}
}
final long serializedLength = endBytes - startBytes;
final TocWriter tocWriter = getTocWriter();
final Integer blockIndex = tocWriter == null ? null : tocWriter.getCurrentBlockIndex();
final File file = getFile();
final String storageLocation = file.getParentFile().getName() + "/" + file.getName();
return new StorageSummary(recordIdentifier, storageLocation, blockIndex, serializedLength, endBytes);
}
use of org.apache.nifi.provenance.serialization.StorageSummary in project kylo by Teradata.
the class KyloRecordWriterDelegate method writeRecord.
@Override
public StorageSummary writeRecord(ProvenanceEventRecord provenanceEventRecord) throws IOException {
StorageSummary storageSummary = recordWriter.writeRecord(provenanceEventRecord);
// record it to the queue
FeedStatisticsManager.getInstance().addEvent(provenanceEventRecord, storageSummary.getEventId());
return storageSummary;
}
Aggregations