Search in sources :

Example 6 with TocWriter

use of org.apache.nifi.provenance.toc.TocWriter in project nifi by apache.

the class TestSchemaRecordReaderWriter method testFieldRemovedFromSchema.

@Test
public void testFieldRemovedFromSchema() throws IOException {
    final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
    try {
        // Create a schema that has the fields modified
        final RecordSchema schemaV1 = ProvenanceEventSchema.PROVENANCE_EVENT_SCHEMA_V1;
        final List<RecordField> fields = new ArrayList<>(schemaV1.getFields());
        fields.remove(new SimpleRecordField(EventFieldNames.UPDATED_ATTRIBUTES, FieldType.STRING, Repetition.EXACTLY_ONE));
        fields.remove(new SimpleRecordField(EventFieldNames.PREVIOUS_ATTRIBUTES, FieldType.STRING, Repetition.EXACTLY_ONE));
        final RecordSchema recordSchema = new RecordSchema(fields);
        // Create a record writer whose schema does not contain updated attributes or previous attributes.
        // This means that we must also override the method that writes out attributes so that we are able
        // to avoid actually writing them out.
        final ByteArraySchemaRecordWriter writer = new ByteArraySchemaRecordWriter(journalFile, idGenerator, tocWriter, false, 0) {

            @Override
            public void writeHeader(long firstEventId, DataOutputStream out) throws IOException {
                final ByteArrayOutputStream baos = new ByteArrayOutputStream();
                recordSchema.writeTo(baos);
                out.writeInt(baos.size());
                baos.writeTo(out);
            }

            @Override
            protected Record createRecord(final ProvenanceEventRecord event, final long eventId) {
                final RecordSchema contentClaimSchema = new RecordSchema(recordSchema.getField(EventFieldNames.CONTENT_CLAIM).getSubFields());
                return new EventRecord(event, eventId, recordSchema, contentClaimSchema);
            }
        };
        try {
            writer.writeHeader(1L);
            writer.writeRecord(createEvent());
            writer.writeRecord(createEvent());
        } finally {
            writer.close();
        }
    } finally {
        tocWriter.close();
    }
    // Read the records in and make sure that they have the info that we expect.
    try (final InputStream in = new FileInputStream(journalFile);
        final TocReader tocReader = new StandardTocReader(tocFile);
        final RecordReader reader = createReader(in, journalFile.getName(), tocReader, 10000)) {
        for (int i = 0; i < 2; i++) {
            final StandardProvenanceEventRecord event = reader.nextRecord();
            assertNotNull(event);
            assertEquals(ProvenanceEventType.RECEIVE, event.getEventType());
            // We will still have a Map<String, String> for updated attributes because the
            // Provenance Event Builder will create an empty map.
            assertNotNull(event.getUpdatedAttributes());
            assertTrue(event.getUpdatedAttributes().isEmpty());
        }
    }
}
Also used : TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) RecordField(org.apache.nifi.repository.schema.RecordField) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) DataOutputStream(java.io.DataOutputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) FileInputStream(java.io.FileInputStream) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) NopTocWriter(org.apache.nifi.provenance.toc.NopTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) EventRecord(org.apache.nifi.provenance.schema.EventRecord) RecordSchema(org.apache.nifi.repository.schema.RecordSchema) Test(org.junit.Test)

Example 7 with TocWriter

use of org.apache.nifi.provenance.toc.TocWriter in project nifi by apache.

the class TestSchemaRecordReaderWriter method testWritePerformance.

@Test
@Ignore("For local testing only")
public void testWritePerformance() throws IOException {
    // This is a simple micro-benchmarking test so that we can determine how fast the serialization/deserialization is before
    // making significant changes. This allows us to ensure that changes that we make do not have significant adverse effects
    // on performance of the repository.
    final ProvenanceEventRecord event = createEvent();
    final TocWriter tocWriter = new NopTocWriter();
    final int numEvents = 10_000_000;
    final long startNanos = System.nanoTime();
    try (final OutputStream nullOut = new NullOutputStream();
        final RecordWriter writer = new ByteArraySchemaRecordWriter(nullOut, "out", idGenerator, tocWriter, false, 0)) {
        writer.writeHeader(0L);
        for (int i = 0; i < numEvents; i++) {
            writer.writeRecord(event);
        }
    }
    final long nanos = System.nanoTime() - startNanos;
    final long millis = TimeUnit.NANOSECONDS.toMillis(nanos);
    System.out.println("Took " + millis + " millis to write " + numEvents + " events");
}
Also used : RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) NopTocWriter(org.apache.nifi.provenance.toc.NopTocWriter) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) NopTocWriter(org.apache.nifi.provenance.toc.NopTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataOutputStream(java.io.DataOutputStream) OutputStream(java.io.OutputStream) NullOutputStream(org.apache.nifi.stream.io.NullOutputStream) NullOutputStream(org.apache.nifi.stream.io.NullOutputStream) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 8 with TocWriter

use of org.apache.nifi.provenance.toc.TocWriter in project nifi by apache.

the class TestPartitionedWriteAheadEventStore method testPerformanceOfAccessingEvents.

@Test
@Ignore
public void testPerformanceOfAccessingEvents() throws Exception {
    final RecordWriterFactory recordWriterFactory = (file, idGenerator, compressed, createToc) -> {
        final TocWriter tocWriter = createToc ? new StandardTocWriter(TocUtil.getTocFile(file), false, false) : null;
        return new EventIdFirstSchemaRecordWriter(file, idGenerator, tocWriter, compressed, 1024 * 1024, IdentifierLookup.EMPTY);
    };
    final RecordReaderFactory recordReaderFactory = (file, logs, maxChars) -> RecordReaders.newRecordReader(file, logs, maxChars);
    final PartitionedWriteAheadEventStore store = new PartitionedWriteAheadEventStore(createConfig(), recordWriterFactory, recordReaderFactory, EventReporter.NO_OP, new EventFileManager());
    store.initialize();
    assertEquals(-1, store.getMaxEventId());
    for (int i = 0; i < 100_000; i++) {
        final ProvenanceEventRecord event1 = createEvent();
        store.addEvents(Collections.singleton(event1));
    }
    final List<Long> eventIdList = Arrays.asList(4L, 80L, 1024L, 40_000L, 80_000L, 99_000L);
    while (true) {
        for (int i = 0; i < 100; i++) {
            time(() -> store.getEvents(eventIdList, EventAuthorizer.GRANT_ALL, EventTransformer.EMPTY_TRANSFORMER), "Fetch Events");
        }
        Thread.sleep(1000L);
    }
}
Also used : Arrays(java.util.Arrays) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) AccessDeniedException(org.apache.nifi.authorization.AccessDeniedException) IdentifierLookup(org.apache.nifi.provenance.IdentifierLookup) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) RecordReaders(org.apache.nifi.provenance.serialization.RecordReaders) ArrayList(java.util.ArrayList) TocWriter(org.apache.nifi.provenance.toc.TocWriter) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) TestName(org.junit.rules.TestName) Map(java.util.Map) TocUtil(org.apache.nifi.provenance.toc.TocUtil) Before(org.junit.Before) EventTransformer(org.apache.nifi.provenance.authorization.EventTransformer) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) Assert.assertNotNull(org.junit.Assert.assertNotNull) ProvenanceEventType(org.apache.nifi.provenance.ProvenanceEventType) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Test(org.junit.Test) UUID(java.util.UUID) File(java.io.File) StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) RecordWriters(org.apache.nifi.provenance.serialization.RecordWriters) Rule(org.junit.Rule) EventReporter(org.apache.nifi.events.EventReporter) Ignore(org.junit.Ignore) RepositoryConfiguration(org.apache.nifi.provenance.RepositoryConfiguration) Assert.assertFalse(org.junit.Assert.assertFalse) Collections(java.util.Collections) EventIdFirstSchemaRecordWriter(org.apache.nifi.provenance.EventIdFirstSchemaRecordWriter) Assert.assertEquals(org.junit.Assert.assertEquals) EventAuthorizer(org.apache.nifi.provenance.authorization.EventAuthorizer) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) EventIdFirstSchemaRecordWriter(org.apache.nifi.provenance.EventIdFirstSchemaRecordWriter) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) AtomicLong(java.util.concurrent.atomic.AtomicLong) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 9 with TocWriter

use of org.apache.nifi.provenance.toc.TocWriter in project nifi by apache.

the class TestWriteAheadStorePartition method testReindex.

@Test
@SuppressWarnings("unchecked")
public void testReindex() throws IOException {
    final RepositoryConfiguration repoConfig = createConfig(1, "testReindex");
    repoConfig.setMaxEventFileCount(5);
    final String partitionName = repoConfig.getStorageDirectories().keySet().iterator().next();
    final File storageDirectory = repoConfig.getStorageDirectories().values().iterator().next();
    final RecordWriterFactory recordWriterFactory = (file, idGenerator, compressed, createToc) -> {
        final TocWriter tocWriter = createToc ? new StandardTocWriter(TocUtil.getTocFile(file), false, false) : null;
        return new EventIdFirstSchemaRecordWriter(file, idGenerator, tocWriter, compressed, 32 * 1024, IdentifierLookup.EMPTY);
    };
    final RecordReaderFactory recordReaderFactory = (file, logs, maxChars) -> RecordReaders.newRecordReader(file, logs, maxChars);
    final WriteAheadStorePartition partition = new WriteAheadStorePartition(storageDirectory, partitionName, repoConfig, recordWriterFactory, recordReaderFactory, new LinkedBlockingQueue<>(), new AtomicLong(0L), EventReporter.NO_OP);
    for (int i = 0; i < 100; i++) {
        partition.addEvents(Collections.singleton(TestUtil.createEvent()));
    }
    final Map<ProvenanceEventRecord, StorageSummary> reindexedEvents = new ConcurrentHashMap<>();
    final EventIndex eventIndex = Mockito.mock(EventIndex.class);
    Mockito.doAnswer(new Answer<Object>() {

        @Override
        public Object answer(final InvocationOnMock invocation) throws Throwable {
            final Map<ProvenanceEventRecord, StorageSummary> events = invocation.getArgumentAt(0, Map.class);
            reindexedEvents.putAll(events);
            return null;
        }
    }).when(eventIndex).reindexEvents(Mockito.anyMap());
    Mockito.doReturn(18L).when(eventIndex).getMinimumEventIdToReindex("1");
    partition.reindexLatestEvents(eventIndex);
    final List<Long> eventIdsReindexed = reindexedEvents.values().stream().map(StorageSummary::getEventId).sorted().collect(Collectors.toList());
    assertEquals(82, eventIdsReindexed.size());
    for (int i = 0; i < eventIdsReindexed.size(); i++) {
        assertEquals(18 + i, eventIdsReindexed.get(i).intValue());
    }
}
Also used : StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) IdentifierLookup(org.apache.nifi.provenance.IdentifierLookup) RecordReaders(org.apache.nifi.provenance.serialization.RecordReaders) TocWriter(org.apache.nifi.provenance.toc.TocWriter) Answer(org.mockito.stubbing.Answer) InvocationOnMock(org.mockito.invocation.InvocationOnMock) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) Map(java.util.Map) TocUtil(org.apache.nifi.provenance.toc.TocUtil) EventIndex(org.apache.nifi.provenance.index.EventIndex) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) IOException(java.io.IOException) Test(org.junit.Test) UUID(java.util.UUID) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) File(java.io.File) AtomicLong(java.util.concurrent.atomic.AtomicLong) Mockito(org.mockito.Mockito) List(java.util.List) EventReporter(org.apache.nifi.events.EventReporter) RepositoryConfiguration(org.apache.nifi.provenance.RepositoryConfiguration) TestUtil(org.apache.nifi.provenance.TestUtil) Collections(java.util.Collections) EventIdFirstSchemaRecordWriter(org.apache.nifi.provenance.EventIdFirstSchemaRecordWriter) Assert.assertEquals(org.junit.Assert.assertEquals) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) RepositoryConfiguration(org.apache.nifi.provenance.RepositoryConfiguration) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) EventIdFirstSchemaRecordWriter(org.apache.nifi.provenance.EventIdFirstSchemaRecordWriter) AtomicLong(java.util.concurrent.atomic.AtomicLong) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) InvocationOnMock(org.mockito.invocation.InvocationOnMock) AtomicLong(java.util.concurrent.atomic.AtomicLong) File(java.io.File) EventIndex(org.apache.nifi.provenance.index.EventIndex) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Test(org.junit.Test)

Example 10 with TocWriter

use of org.apache.nifi.provenance.toc.TocWriter in project nifi by apache.

the class EventIdFirstSchemaRecordWriter method writeRecord.

@Override
public StorageSummary writeRecord(final ProvenanceEventRecord record) throws IOException {
    if (isDirty()) {
        throw new IOException("Cannot update Provenance Repository because this Record Writer has already failed to write to the Repository");
    }
    final long lockStart;
    final long writeStart;
    final long startBytes;
    final long endBytes;
    final long recordIdentifier;
    final long serializeStart = System.nanoTime();
    final ByteArrayDataOutputStream bados = streamCache.checkOut();
    try {
        writeRecord(record, 0L, bados.getDataOutputStream());
        lockStart = System.nanoTime();
        synchronized (this) {
            writeStart = System.nanoTime();
            try {
                recordIdentifier = record.getEventId() == -1L ? getIdGenerator().getAndIncrement() : record.getEventId();
                startBytes = getBytesWritten();
                ensureStreamState(recordIdentifier, startBytes);
                final DataOutputStream out = getBufferedOutputStream();
                final int recordIdOffset = (int) (recordIdentifier - firstEventId);
                out.writeInt(recordIdOffset);
                final ByteArrayOutputStream baos = bados.getByteArrayOutputStream();
                out.writeInt(baos.size());
                baos.writeTo(out);
                recordCount.incrementAndGet();
                endBytes = getBytesWritten();
            } catch (final IOException ioe) {
                markDirty();
                throw ioe;
            }
        }
    } finally {
        streamCache.checkIn(bados);
    }
    if (logger.isDebugEnabled()) {
        // Collect stats and periodically dump them if log level is set to at least info.
        final long writeNanos = System.nanoTime() - writeStart;
        writeTimes.add(new TimestampedLong(writeNanos));
        final long serializeNanos = lockStart - serializeStart;
        serializeTimes.add(new TimestampedLong(serializeNanos));
        final long lockNanos = writeStart - lockStart;
        lockTimes.add(new TimestampedLong(lockNanos));
        bytesWritten.add(new TimestampedLong(endBytes - startBytes));
        final long recordCount = totalRecordCount.incrementAndGet();
        if (recordCount % 1_000_000 == 0) {
            final long sixtySecondsAgo = System.currentTimeMillis() - 60000L;
            final Long writeNanosLast60 = writeTimes.getAggregateValue(sixtySecondsAgo).getValue();
            final Long lockNanosLast60 = lockTimes.getAggregateValue(sixtySecondsAgo).getValue();
            final Long serializeNanosLast60 = serializeTimes.getAggregateValue(sixtySecondsAgo).getValue();
            final Long bytesWrittenLast60 = bytesWritten.getAggregateValue(sixtySecondsAgo).getValue();
            logger.debug("In the last 60 seconds, have spent {} millis writing to file ({} MB), {} millis waiting on synchronize block, {} millis serializing events", TimeUnit.NANOSECONDS.toMillis(writeNanosLast60), bytesWrittenLast60 / 1024 / 1024, TimeUnit.NANOSECONDS.toMillis(lockNanosLast60), TimeUnit.NANOSECONDS.toMillis(serializeNanosLast60));
        }
    }
    final long serializedLength = endBytes - startBytes;
    final TocWriter tocWriter = getTocWriter();
    final Integer blockIndex = tocWriter == null ? null : tocWriter.getCurrentBlockIndex();
    final File file = getFile();
    final String storageLocation = file.getParentFile().getName() + "/" + file.getName();
    return new StorageSummary(recordIdentifier, storageLocation, blockIndex, serializedLength, endBytes);
}
Also used : DataOutputStream(java.io.DataOutputStream) ByteArrayDataOutputStream(org.apache.nifi.provenance.util.ByteArrayDataOutputStream) IOException(java.io.IOException) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ByteArrayDataOutputStream(org.apache.nifi.provenance.util.ByteArrayDataOutputStream) TimestampedLong(org.apache.nifi.util.timebuffer.TimestampedLong) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) TocWriter(org.apache.nifi.provenance.toc.TocWriter) TimestampedLong(org.apache.nifi.util.timebuffer.TimestampedLong) AtomicLong(java.util.concurrent.atomic.AtomicLong) File(java.io.File)

Aggregations

TocWriter (org.apache.nifi.provenance.toc.TocWriter)23 StandardTocWriter (org.apache.nifi.provenance.toc.StandardTocWriter)18 File (java.io.File)16 Test (org.junit.Test)16 RecordWriter (org.apache.nifi.provenance.serialization.RecordWriter)13 StandardTocReader (org.apache.nifi.provenance.toc.StandardTocReader)12 TocReader (org.apache.nifi.provenance.toc.TocReader)12 FileInputStream (java.io.FileInputStream)10 RecordReader (org.apache.nifi.provenance.serialization.RecordReader)10 IOException (java.io.IOException)8 HashMap (java.util.HashMap)8 DataOutputStream (java.io.DataOutputStream)7 ByteArrayOutputStream (java.io.ByteArrayOutputStream)6 ArrayList (java.util.ArrayList)4 EventReporter (org.apache.nifi.events.EventReporter)4 RecordReaders (org.apache.nifi.provenance.serialization.RecordReaders)4 StorageSummary (org.apache.nifi.provenance.serialization.StorageSummary)4 NopTocWriter (org.apache.nifi.provenance.toc.NopTocWriter)4 TocUtil (org.apache.nifi.provenance.toc.TocUtil)4 OutputStream (java.io.OutputStream)3