use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.
the class TestEventIndexTask method testIndexWriterCommittedWhenAppropriate.
@Test(timeout = 5000)
public void testIndexWriterCommittedWhenAppropriate() throws IOException, InterruptedException {
final BlockingQueue<StoredDocument> docQueue = new LinkedBlockingQueue<>();
final RepositoryConfiguration repoConfig = new RepositoryConfiguration();
final File storageDir = new File("target/storage/TestEventIndexTask/1");
repoConfig.addStorageDirectory("1", storageDir);
final AtomicInteger commitCount = new AtomicInteger(0);
// Mock out an IndexWriter and keep track of the number of events that are indexed.
final IndexWriter indexWriter = Mockito.mock(IndexWriter.class);
final EventIndexWriter eventIndexWriter = new LuceneEventIndexWriter(indexWriter, storageDir);
final IndexManager indexManager = Mockito.mock(IndexManager.class);
final IndexDirectoryManager directoryManager = new IndexDirectoryManager(repoConfig);
// Create an EventIndexTask and override the commit(IndexWriter) method so that we can keep track of how
// many times the index writer gets committed.
final EventIndexTask task = new EventIndexTask(docQueue, repoConfig, indexManager, directoryManager, 201, EventReporter.NO_OP) {
protected void commit(EventIndexWriter indexWriter) throws IOException {
// Create 4 threads, each one a daemon thread running the EventIndexTask
for (int i = 0; i < 4; i++) {
final Thread t = new Thread(task);
assertEquals(0, commitCount.get());
// Index 100 documents with a storage filename of "0.0.prov"
for (int i = 0; i < 100; i++) {
final Document document = new Document();
document.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), System.currentTimeMillis(), Store.NO));
final StorageSummary location = new StorageSummary(1L, "0.0.prov", "1", 0, 1000L, 1000L);
final StoredDocument storedDoc = new StoredDocument(document, location);
assertEquals(0, commitCount.get());
// Index 100 documents
for (int i = 0; i < 100; i++) {
final Document document = new Document();
document.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), System.currentTimeMillis(), Store.NO));
final StorageSummary location = new StorageSummary(1L, "0.0.prov", "1", 0, 1000L, 1000L);
final StoredDocument storedDoc = new StoredDocument(document, location);
// Wait until we've indexed all 200 events
while (eventIndexWriter.getEventsIndexed() < 200) {
// Wait a bit and make sure that we still haven't committed the index writer.
assertEquals(0, commitCount.get());
// Add another document.
final Document document = new Document();
document.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), System.currentTimeMillis(), Store.NO));
final StorageSummary location = new StorageSummary(1L, "0.0.prov", "1", 0, 1000L, 1000L);
StoredDocument storedDoc = new StoredDocument(document, location);
// Wait until index writer is committed.
while (commitCount.get() == 0) {
assertEquals(1, commitCount.get());
// Add a new IndexableDocument with a count of 1 to ensure that the writer is committed again.
storedDoc = new StoredDocument(document, location);
assertEquals(1, commitCount.get());
// Add a new IndexableDocument with a count of 3. Index writer should not be committed again.
storedDoc = new StoredDocument(document, location);
assertEquals(1, commitCount.get());
use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.
the class ArrayListEventStore method addEvents.
public synchronized StorageResult addEvents(Iterable<ProvenanceEventRecord> events) {
final Map<ProvenanceEventRecord, StorageSummary> storageLocations = new HashMap<>();
for (final ProvenanceEventRecord event : events) {;
final StorageSummary storageSummary = new StorageSummary(idGenerator.getAndIncrement(), "location", "1", 1, 0L, 0L);
storageLocations.put(event, storageSummary);
return new StorageResult() {
public Map<ProvenanceEventRecord, StorageSummary> getStorageLocations() {
return storageLocations;
public boolean triggeredRollover() {
return false;
public Integer getEventsRolledOver() {
return null;
use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.
the class TestPartitionedWriteAheadEventStore method testSingleWriteThenRead.
public void testSingleWriteThenRead() throws IOException {
final PartitionedWriteAheadEventStore store = new PartitionedWriteAheadEventStore(createConfig(), writerFactory, readerFactory, EventReporter.NO_OP, new EventFileManager());
assertEquals(-1, store.getMaxEventId());
final ProvenanceEventRecord event1 = createEvent();
final StorageResult result = store.addEvents(Collections.singleton(event1));
final StorageSummary summary = result.getStorageLocations().values().iterator().next();
final long eventId = summary.getEventId();
final ProvenanceEventRecord eventWithId = addId(event1, eventId);
assertEquals(0, store.getMaxEventId());
final ProvenanceEventRecord read = store.getEvent(eventId).get();
assertEquals(eventWithId, read);
use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.
the class TestWriteAheadStorePartition method testReindex.
public void testReindex() throws IOException {
final RepositoryConfiguration repoConfig = createConfig(1, "testReindex");
final String partitionName = repoConfig.getStorageDirectories().keySet().iterator().next();
final File storageDirectory = repoConfig.getStorageDirectories().values().iterator().next();
final RecordWriterFactory recordWriterFactory = (file, idGenerator, compressed, createToc) -> {
final TocWriter tocWriter = createToc ? new StandardTocWriter(TocUtil.getTocFile(file), false, false) : null;
return new EventIdFirstSchemaRecordWriter(file, idGenerator, tocWriter, compressed, 32 * 1024, IdentifierLookup.EMPTY);
final RecordReaderFactory recordReaderFactory = (file, logs, maxChars) -> RecordReaders.newRecordReader(file, logs, maxChars);
final WriteAheadStorePartition partition = new WriteAheadStorePartition(storageDirectory, partitionName, repoConfig, recordWriterFactory, recordReaderFactory, new LinkedBlockingQueue<>(), new AtomicLong(0L), EventReporter.NO_OP);
for (int i = 0; i < 100; i++) {
final Map<ProvenanceEventRecord, StorageSummary> reindexedEvents = new ConcurrentHashMap<>();
final EventIndex eventIndex = Mockito.mock(EventIndex.class);
Mockito.doAnswer(new Answer<Object>() {
public Object answer(final InvocationOnMock invocation) throws Throwable {
final Map<ProvenanceEventRecord, StorageSummary> events = invocation.getArgumentAt(0, Map.class);
return null;
final List<Long> eventIdsReindexed = reindexedEvents.values().stream().map(StorageSummary::getEventId).sorted().collect(Collectors.toList());
assertEquals(82, eventIdsReindexed.size());
for (int i = 0; i < eventIdsReindexed.size(); i++) {
assertEquals(18 + i, eventIdsReindexed.get(i).intValue());
use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.
the class EventIdFirstSchemaRecordWriter method writeRecord.
public StorageSummary writeRecord(final ProvenanceEventRecord record) throws IOException {
if (isDirty()) {
throw new IOException("Cannot update Provenance Repository because this Record Writer has already failed to write to the Repository");
final long lockStart;
final long writeStart;
final long startBytes;
final long endBytes;
final long recordIdentifier;
final long serializeStart = System.nanoTime();
final ByteArrayDataOutputStream bados = streamCache.checkOut();
try {
writeRecord(record, 0L, bados.getDataOutputStream());
lockStart = System.nanoTime();
synchronized (this) {
writeStart = System.nanoTime();
try {
recordIdentifier = record.getEventId() == -1L ? getIdGenerator().getAndIncrement() : record.getEventId();
startBytes = getBytesWritten();
ensureStreamState(recordIdentifier, startBytes);
final DataOutputStream out = getBufferedOutputStream();
final int recordIdOffset = (int) (recordIdentifier - firstEventId);
final ByteArrayOutputStream baos = bados.getByteArrayOutputStream();
endBytes = getBytesWritten();
} catch (final IOException ioe) {
throw ioe;
} finally {
if (logger.isDebugEnabled()) {
// Collect stats and periodically dump them if log level is set to at least info.
final long writeNanos = System.nanoTime() - writeStart;
writeTimes.add(new TimestampedLong(writeNanos));
final long serializeNanos = lockStart - serializeStart;
serializeTimes.add(new TimestampedLong(serializeNanos));
final long lockNanos = writeStart - lockStart;
lockTimes.add(new TimestampedLong(lockNanos));
bytesWritten.add(new TimestampedLong(endBytes - startBytes));
final long recordCount = totalRecordCount.incrementAndGet();
if (recordCount % 1_000_000 == 0) {
final long sixtySecondsAgo = System.currentTimeMillis() - 60000L;
final Long writeNanosLast60 = writeTimes.getAggregateValue(sixtySecondsAgo).getValue();
final Long lockNanosLast60 = lockTimes.getAggregateValue(sixtySecondsAgo).getValue();
final Long serializeNanosLast60 = serializeTimes.getAggregateValue(sixtySecondsAgo).getValue();
final Long bytesWrittenLast60 = bytesWritten.getAggregateValue(sixtySecondsAgo).getValue();
logger.debug("In the last 60 seconds, have spent {} millis writing to file ({} MB), {} millis waiting on synchronize block, {} millis serializing events", TimeUnit.NANOSECONDS.toMillis(writeNanosLast60), bytesWrittenLast60 / 1024 / 1024, TimeUnit.NANOSECONDS.toMillis(lockNanosLast60), TimeUnit.NANOSECONDS.toMillis(serializeNanosLast60));
final long serializedLength = endBytes - startBytes;
final TocWriter tocWriter = getTocWriter();
final Integer blockIndex = tocWriter == null ? null : tocWriter.getCurrentBlockIndex();
final File file = getFile();
final String storageLocation = file.getParentFile().getName() + "/" + file.getName();
return new StorageSummary(recordIdentifier, storageLocation, blockIndex, serializedLength, endBytes);