Search in sources :

Example 1 with StorageSummary

use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.

the class TestEventIndexTask method testIndexWriterCommittedWhenAppropriate.

@Test(timeout = 5000)
public void testIndexWriterCommittedWhenAppropriate() throws IOException, InterruptedException {
    final BlockingQueue<StoredDocument> docQueue = new LinkedBlockingQueue<>();
    final RepositoryConfiguration repoConfig = new RepositoryConfiguration();
    final File storageDir = new File("target/storage/TestEventIndexTask/1");
    repoConfig.addStorageDirectory("1", storageDir);
    final AtomicInteger commitCount = new AtomicInteger(0);
    // Mock out an IndexWriter and keep track of the number of events that are indexed.
    final IndexWriter indexWriter = Mockito.mock(IndexWriter.class);
    final EventIndexWriter eventIndexWriter = new LuceneEventIndexWriter(indexWriter, storageDir);
    final IndexManager indexManager = Mockito.mock(IndexManager.class);
    Mockito.when(indexManager.borrowIndexWriter(Mockito.any(File.class))).thenReturn(eventIndexWriter);
    final IndexDirectoryManager directoryManager = new IndexDirectoryManager(repoConfig);
    // Create an EventIndexTask and override the commit(IndexWriter) method so that we can keep track of how
    // many times the index writer gets committed.
    final EventIndexTask task = new EventIndexTask(docQueue, repoConfig, indexManager, directoryManager, 201, EventReporter.NO_OP) {

        @Override
        protected void commit(EventIndexWriter indexWriter) throws IOException {
            commitCount.incrementAndGet();
        }
    };
    // Create 4 threads, each one a daemon thread running the EventIndexTask
    for (int i = 0; i < 4; i++) {
        final Thread t = new Thread(task);
        t.setDaemon(true);
        t.start();
    }
    assertEquals(0, commitCount.get());
    // Index 100 documents with a storage filename of "0.0.prov"
    for (int i = 0; i < 100; i++) {
        final Document document = new Document();
        document.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), System.currentTimeMillis(), Store.NO));
        final StorageSummary location = new StorageSummary(1L, "0.0.prov", "1", 0, 1000L, 1000L);
        final StoredDocument storedDoc = new StoredDocument(document, location);
        docQueue.add(storedDoc);
    }
    assertEquals(0, commitCount.get());
    // Index 100 documents
    for (int i = 0; i < 100; i++) {
        final Document document = new Document();
        document.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), System.currentTimeMillis(), Store.NO));
        final StorageSummary location = new StorageSummary(1L, "0.0.prov", "1", 0, 1000L, 1000L);
        final StoredDocument storedDoc = new StoredDocument(document, location);
        docQueue.add(storedDoc);
    }
    // Wait until we've indexed all 200 events
    while (eventIndexWriter.getEventsIndexed() < 200) {
        Thread.sleep(10L);
    }
    // Wait a bit and make sure that we still haven't committed the index writer.
    Thread.sleep(100L);
    assertEquals(0, commitCount.get());
    // Add another document.
    final Document document = new Document();
    document.add(new LongField(SearchableFields.EventTime.getSearchableFieldName(), System.currentTimeMillis(), Store.NO));
    final StorageSummary location = new StorageSummary(1L, "0.0.prov", "1", 0, 1000L, 1000L);
    StoredDocument storedDoc = new StoredDocument(document, location);
    docQueue.add(storedDoc);
    // Wait until index writer is committed.
    while (commitCount.get() == 0) {
        Thread.sleep(10L);
    }
    assertEquals(1, commitCount.get());
    // Add a new IndexableDocument with a count of 1 to ensure that the writer is committed again.
    storedDoc = new StoredDocument(document, location);
    docQueue.add(storedDoc);
    Thread.sleep(100L);
    assertEquals(1, commitCount.get());
    // Add a new IndexableDocument with a count of 3. Index writer should not be committed again.
    storedDoc = new StoredDocument(document, location);
    docQueue.add(storedDoc);
    Thread.sleep(100L);
    assertEquals(1, commitCount.get());
}
Also used : LuceneEventIndexWriter(org.apache.nifi.provenance.lucene.LuceneEventIndexWriter) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Document(org.apache.lucene.document.Document) IndexManager(org.apache.nifi.provenance.lucene.IndexManager) LongField(org.apache.lucene.document.LongField) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) LuceneEventIndexWriter(org.apache.nifi.provenance.lucene.LuceneEventIndexWriter) IndexWriter(org.apache.lucene.index.IndexWriter) EventIndexWriter(org.apache.nifi.provenance.index.EventIndexWriter) LuceneEventIndexWriter(org.apache.nifi.provenance.lucene.LuceneEventIndexWriter) EventIndexWriter(org.apache.nifi.provenance.index.EventIndexWriter) RepositoryConfiguration(org.apache.nifi.provenance.RepositoryConfiguration) File(java.io.File) Test(org.junit.Test)

Example 2 with StorageSummary

use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.

the class ArrayListEventStore method addEvents.

@Override
public synchronized StorageResult addEvents(Iterable<ProvenanceEventRecord> events) {
    final Map<ProvenanceEventRecord, StorageSummary> storageLocations = new HashMap<>();
    for (final ProvenanceEventRecord event : events) {
        this.events.add(event);
        final StorageSummary storageSummary = new StorageSummary(idGenerator.getAndIncrement(), "location", "1", 1, 0L, 0L);
        storageLocations.put(event, storageSummary);
    }
    return new StorageResult() {

        @Override
        public Map<ProvenanceEventRecord, StorageSummary> getStorageLocations() {
            return storageLocations;
        }

        @Override
        public boolean triggeredRollover() {
            return false;
        }

        @Override
        public Integer getEventsRolledOver() {
            return null;
        }
    };
}
Also used : StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) HashMap(java.util.HashMap) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord)

Example 3 with StorageSummary

use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.

the class TestPartitionedWriteAheadEventStore method testSingleWriteThenRead.

@Test
public void testSingleWriteThenRead() throws IOException {
    final PartitionedWriteAheadEventStore store = new PartitionedWriteAheadEventStore(createConfig(), writerFactory, readerFactory, EventReporter.NO_OP, new EventFileManager());
    store.initialize();
    assertEquals(-1, store.getMaxEventId());
    final ProvenanceEventRecord event1 = createEvent();
    final StorageResult result = store.addEvents(Collections.singleton(event1));
    final StorageSummary summary = result.getStorageLocations().values().iterator().next();
    final long eventId = summary.getEventId();
    final ProvenanceEventRecord eventWithId = addId(event1, eventId);
    assertEquals(0, store.getMaxEventId());
    final ProvenanceEventRecord read = store.getEvent(eventId).get();
    assertEquals(eventWithId, read);
}
Also used : StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) Test(org.junit.Test)

Example 4 with StorageSummary

use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.

the class TestWriteAheadStorePartition method testReindex.

@Test
@SuppressWarnings("unchecked")
public void testReindex() throws IOException {
    final RepositoryConfiguration repoConfig = createConfig(1, "testReindex");
    repoConfig.setMaxEventFileCount(5);
    final String partitionName = repoConfig.getStorageDirectories().keySet().iterator().next();
    final File storageDirectory = repoConfig.getStorageDirectories().values().iterator().next();
    final RecordWriterFactory recordWriterFactory = (file, idGenerator, compressed, createToc) -> {
        final TocWriter tocWriter = createToc ? new StandardTocWriter(TocUtil.getTocFile(file), false, false) : null;
        return new EventIdFirstSchemaRecordWriter(file, idGenerator, tocWriter, compressed, 32 * 1024, IdentifierLookup.EMPTY);
    };
    final RecordReaderFactory recordReaderFactory = (file, logs, maxChars) -> RecordReaders.newRecordReader(file, logs, maxChars);
    final WriteAheadStorePartition partition = new WriteAheadStorePartition(storageDirectory, partitionName, repoConfig, recordWriterFactory, recordReaderFactory, new LinkedBlockingQueue<>(), new AtomicLong(0L), EventReporter.NO_OP);
    for (int i = 0; i < 100; i++) {
        partition.addEvents(Collections.singleton(TestUtil.createEvent()));
    }
    final Map<ProvenanceEventRecord, StorageSummary> reindexedEvents = new ConcurrentHashMap<>();
    final EventIndex eventIndex = Mockito.mock(EventIndex.class);
    Mockito.doAnswer(new Answer<Object>() {

        @Override
        public Object answer(final InvocationOnMock invocation) throws Throwable {
            final Map<ProvenanceEventRecord, StorageSummary> events = invocation.getArgumentAt(0, Map.class);
            reindexedEvents.putAll(events);
            return null;
        }
    }).when(eventIndex).reindexEvents(Mockito.anyMap());
    Mockito.doReturn(18L).when(eventIndex).getMinimumEventIdToReindex("1");
    partition.reindexLatestEvents(eventIndex);
    final List<Long> eventIdsReindexed = reindexedEvents.values().stream().map(StorageSummary::getEventId).sorted().collect(Collectors.toList());
    assertEquals(82, eventIdsReindexed.size());
    for (int i = 0; i < eventIdsReindexed.size(); i++) {
        assertEquals(18 + i, eventIdsReindexed.get(i).intValue());
    }
}
Also used : StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) IdentifierLookup(org.apache.nifi.provenance.IdentifierLookup) RecordReaders(org.apache.nifi.provenance.serialization.RecordReaders) TocWriter(org.apache.nifi.provenance.toc.TocWriter) Answer(org.mockito.stubbing.Answer) InvocationOnMock(org.mockito.invocation.InvocationOnMock) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) Map(java.util.Map) TocUtil(org.apache.nifi.provenance.toc.TocUtil) EventIndex(org.apache.nifi.provenance.index.EventIndex) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) IOException(java.io.IOException) Test(org.junit.Test) UUID(java.util.UUID) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) File(java.io.File) AtomicLong(java.util.concurrent.atomic.AtomicLong) Mockito(org.mockito.Mockito) List(java.util.List) EventReporter(org.apache.nifi.events.EventReporter) RepositoryConfiguration(org.apache.nifi.provenance.RepositoryConfiguration) TestUtil(org.apache.nifi.provenance.TestUtil) Collections(java.util.Collections) EventIdFirstSchemaRecordWriter(org.apache.nifi.provenance.EventIdFirstSchemaRecordWriter) Assert.assertEquals(org.junit.Assert.assertEquals) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) RepositoryConfiguration(org.apache.nifi.provenance.RepositoryConfiguration) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) EventIdFirstSchemaRecordWriter(org.apache.nifi.provenance.EventIdFirstSchemaRecordWriter) AtomicLong(java.util.concurrent.atomic.AtomicLong) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) InvocationOnMock(org.mockito.invocation.InvocationOnMock) AtomicLong(java.util.concurrent.atomic.AtomicLong) File(java.io.File) EventIndex(org.apache.nifi.provenance.index.EventIndex) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Test(org.junit.Test)

Example 5 with StorageSummary

use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.

the class EventIdFirstSchemaRecordWriter method writeRecord.

@Override
public StorageSummary writeRecord(final ProvenanceEventRecord record) throws IOException {
    if (isDirty()) {
        throw new IOException("Cannot update Provenance Repository because this Record Writer has already failed to write to the Repository");
    }
    final long lockStart;
    final long writeStart;
    final long startBytes;
    final long endBytes;
    final long recordIdentifier;
    final long serializeStart = System.nanoTime();
    final ByteArrayDataOutputStream bados = streamCache.checkOut();
    try {
        writeRecord(record, 0L, bados.getDataOutputStream());
        lockStart = System.nanoTime();
        synchronized (this) {
            writeStart = System.nanoTime();
            try {
                recordIdentifier = record.getEventId() == -1L ? getIdGenerator().getAndIncrement() : record.getEventId();
                startBytes = getBytesWritten();
                ensureStreamState(recordIdentifier, startBytes);
                final DataOutputStream out = getBufferedOutputStream();
                final int recordIdOffset = (int) (recordIdentifier - firstEventId);
                out.writeInt(recordIdOffset);
                final ByteArrayOutputStream baos = bados.getByteArrayOutputStream();
                out.writeInt(baos.size());
                baos.writeTo(out);
                recordCount.incrementAndGet();
                endBytes = getBytesWritten();
            } catch (final IOException ioe) {
                markDirty();
                throw ioe;
            }
        }
    } finally {
        streamCache.checkIn(bados);
    }
    if (logger.isDebugEnabled()) {
        // Collect stats and periodically dump them if log level is set to at least info.
        final long writeNanos = System.nanoTime() - writeStart;
        writeTimes.add(new TimestampedLong(writeNanos));
        final long serializeNanos = lockStart - serializeStart;
        serializeTimes.add(new TimestampedLong(serializeNanos));
        final long lockNanos = writeStart - lockStart;
        lockTimes.add(new TimestampedLong(lockNanos));
        bytesWritten.add(new TimestampedLong(endBytes - startBytes));
        final long recordCount = totalRecordCount.incrementAndGet();
        if (recordCount % 1_000_000 == 0) {
            final long sixtySecondsAgo = System.currentTimeMillis() - 60000L;
            final Long writeNanosLast60 = writeTimes.getAggregateValue(sixtySecondsAgo).getValue();
            final Long lockNanosLast60 = lockTimes.getAggregateValue(sixtySecondsAgo).getValue();
            final Long serializeNanosLast60 = serializeTimes.getAggregateValue(sixtySecondsAgo).getValue();
            final Long bytesWrittenLast60 = bytesWritten.getAggregateValue(sixtySecondsAgo).getValue();
            logger.debug("In the last 60 seconds, have spent {} millis writing to file ({} MB), {} millis waiting on synchronize block, {} millis serializing events", TimeUnit.NANOSECONDS.toMillis(writeNanosLast60), bytesWrittenLast60 / 1024 / 1024, TimeUnit.NANOSECONDS.toMillis(lockNanosLast60), TimeUnit.NANOSECONDS.toMillis(serializeNanosLast60));
        }
    }
    final long serializedLength = endBytes - startBytes;
    final TocWriter tocWriter = getTocWriter();
    final Integer blockIndex = tocWriter == null ? null : tocWriter.getCurrentBlockIndex();
    final File file = getFile();
    final String storageLocation = file.getParentFile().getName() + "/" + file.getName();
    return new StorageSummary(recordIdentifier, storageLocation, blockIndex, serializedLength, endBytes);
}
Also used : DataOutputStream(java.io.DataOutputStream) ByteArrayDataOutputStream(org.apache.nifi.provenance.util.ByteArrayDataOutputStream) IOException(java.io.IOException) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ByteArrayDataOutputStream(org.apache.nifi.provenance.util.ByteArrayDataOutputStream) TimestampedLong(org.apache.nifi.util.timebuffer.TimestampedLong) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) TocWriter(org.apache.nifi.provenance.toc.TocWriter) TimestampedLong(org.apache.nifi.util.timebuffer.TimestampedLong) AtomicLong(java.util.concurrent.atomic.AtomicLong) File(java.io.File)

Aggregations

StorageSummary (org.apache.nifi.provenance.serialization.StorageSummary)16 IOException (java.io.IOException)10 ProvenanceEventRecord (org.apache.nifi.provenance.ProvenanceEventRecord)9 File (java.io.File)6 StandardProvenanceEventRecord (org.apache.nifi.provenance.StandardProvenanceEventRecord)6 Test (org.junit.Test)5 EOFException (java.io.EOFException)4 ExecutionException (java.util.concurrent.ExecutionException)4 RepositoryConfiguration (org.apache.nifi.provenance.RepositoryConfiguration)4 HashMap (java.util.HashMap)3 AtomicLong (java.util.concurrent.atomic.AtomicLong)3 IndexManager (org.apache.nifi.provenance.lucene.IndexManager)3 TocWriter (org.apache.nifi.provenance.toc.TocWriter)3 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 DataOutputStream (java.io.DataOutputStream)2 FileNotFoundException (java.io.FileNotFoundException)2 ArrayList (java.util.ArrayList)2 Map (java.util.Map)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)2