Search in sources :

Example 6 with StandardTocWriter

use of org.apache.nifi.provenance.toc.StandardTocWriter in project nifi by apache.

the class TestEventIdFirstSchemaRecordReaderWriter method testContentClaimRemoved.

@Test
public void testContentClaimRemoved() throws IOException {
    final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite.gz");
    final File tocFile = TocUtil.getTocFile(journalFile);
    final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
    final RecordWriter writer = createWriter(journalFile, tocWriter, true, 8192);
    final Map<String, String> attributes = new HashMap<>();
    attributes.put("filename", "1.txt");
    attributes.put("uuid", UUID.randomUUID().toString());
    final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder();
    builder.setEventTime(System.currentTimeMillis());
    builder.setEventType(ProvenanceEventType.RECEIVE);
    builder.setTransitUri("nifi://unit-test");
    builder.fromFlowFile(TestUtil.createFlowFile(3L, 3000L, attributes));
    builder.setComponentId("1234");
    builder.setComponentType("dummy processor");
    builder.setPreviousContentClaim("container-1", "section-1", "identifier-1", 1L, 1L);
    builder.setCurrentContentClaim(null, null, null, 0L, 0L);
    final ProvenanceEventRecord record = builder.build();
    writer.writeHeader(1L);
    writer.writeRecord(record);
    writer.close();
    final TocReader tocReader = new StandardTocReader(tocFile);
    try (final FileInputStream fis = new FileInputStream(journalFile);
        final RecordReader reader = createReader(fis, journalFile.getName(), tocReader, 2048)) {
        assertEquals(0, reader.getBlockIndex());
        reader.skipToBlock(0);
        final StandardProvenanceEventRecord recovered = reader.nextRecord();
        assertNotNull(recovered);
        assertEquals("nifi://unit-test", recovered.getTransitUri());
        assertEquals("container-1", recovered.getPreviousContentClaimContainer());
        assertNull(recovered.getContentClaimContainer());
        assertEquals("section-1", recovered.getPreviousContentClaimSection());
        assertNull(recovered.getContentClaimSection());
        assertEquals("identifier-1", recovered.getPreviousContentClaimIdentifier());
        assertNull(recovered.getContentClaimIdentifier());
        assertEquals(1L, recovered.getPreviousContentClaimOffset().longValue());
        assertNull(recovered.getContentClaimOffset());
        assertEquals(1L, recovered.getPreviousFileSize().longValue());
        assertEquals(0L, recovered.getFileSize());
        assertNull(reader.nextRecord());
    }
    FileUtils.deleteFile(journalFile.getParentFile(), true);
}
Also used : TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) HashMap(java.util.HashMap) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) FileInputStream(java.io.FileInputStream) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) File(java.io.File) Test(org.junit.Test)

Example 7 with StandardTocWriter

use of org.apache.nifi.provenance.toc.StandardTocWriter in project nifi by apache.

the class TestSchemaRecordReaderWriter method testFieldRemovedFromSchema.

@Test
public void testFieldRemovedFromSchema() throws IOException {
    final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
    try {
        // Create a schema that has the fields modified
        final RecordSchema schemaV1 = ProvenanceEventSchema.PROVENANCE_EVENT_SCHEMA_V1;
        final List<RecordField> fields = new ArrayList<>(schemaV1.getFields());
        fields.remove(new SimpleRecordField(EventFieldNames.UPDATED_ATTRIBUTES, FieldType.STRING, Repetition.EXACTLY_ONE));
        fields.remove(new SimpleRecordField(EventFieldNames.PREVIOUS_ATTRIBUTES, FieldType.STRING, Repetition.EXACTLY_ONE));
        final RecordSchema recordSchema = new RecordSchema(fields);
        // Create a record writer whose schema does not contain updated attributes or previous attributes.
        // This means that we must also override the method that writes out attributes so that we are able
        // to avoid actually writing them out.
        final ByteArraySchemaRecordWriter writer = new ByteArraySchemaRecordWriter(journalFile, idGenerator, tocWriter, false, 0) {

            @Override
            public void writeHeader(long firstEventId, DataOutputStream out) throws IOException {
                final ByteArrayOutputStream baos = new ByteArrayOutputStream();
                recordSchema.writeTo(baos);
                out.writeInt(baos.size());
                baos.writeTo(out);
            }

            @Override
            protected Record createRecord(final ProvenanceEventRecord event, final long eventId) {
                final RecordSchema contentClaimSchema = new RecordSchema(recordSchema.getField(EventFieldNames.CONTENT_CLAIM).getSubFields());
                return new EventRecord(event, eventId, recordSchema, contentClaimSchema);
            }
        };
        try {
            writer.writeHeader(1L);
            writer.writeRecord(createEvent());
            writer.writeRecord(createEvent());
        } finally {
            writer.close();
        }
    } finally {
        tocWriter.close();
    }
    // Read the records in and make sure that they have the info that we expect.
    try (final InputStream in = new FileInputStream(journalFile);
        final TocReader tocReader = new StandardTocReader(tocFile);
        final RecordReader reader = createReader(in, journalFile.getName(), tocReader, 10000)) {
        for (int i = 0; i < 2; i++) {
            final StandardProvenanceEventRecord event = reader.nextRecord();
            assertNotNull(event);
            assertEquals(ProvenanceEventType.RECEIVE, event.getEventType());
            // We will still have a Map<String, String> for updated attributes because the
            // Provenance Event Builder will create an empty map.
            assertNotNull(event.getUpdatedAttributes());
            assertTrue(event.getUpdatedAttributes().isEmpty());
        }
    }
}
Also used : TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) RecordField(org.apache.nifi.repository.schema.RecordField) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) DataOutputStream(java.io.DataOutputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) FileInputStream(java.io.FileInputStream) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) NopTocWriter(org.apache.nifi.provenance.toc.NopTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) EventRecord(org.apache.nifi.provenance.schema.EventRecord) RecordSchema(org.apache.nifi.repository.schema.RecordSchema) Test(org.junit.Test)

Example 8 with StandardTocWriter

use of org.apache.nifi.provenance.toc.StandardTocWriter in project nifi by apache.

the class TestSchemaRecordReaderWriter method testPerformanceOfRandomAccessReads.

@Test
@Ignore("runs forever for performance analysis/profiling")
public void testPerformanceOfRandomAccessReads() throws Exception {
    journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testPerformanceOfRandomAccessReads.gz");
    tocFile = TocUtil.getTocFile(journalFile);
    try (final RecordWriter writer = createWriter(journalFile, new StandardTocWriter(tocFile, true, false), true, 1024 * 32)) {
        writer.writeHeader(0L);
        for (int i = 0; i < 100_000; i++) {
            writer.writeRecord(createEvent());
        }
    }
    final long[] eventIds = new long[] { 4, 80, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 40_000, 80_000, 99_000 };
    boolean loopForever = true;
    while (loopForever) {
        final long start = System.nanoTime();
        for (int i = 0; i < 1000; i++) {
            try (final InputStream in = new FileInputStream(journalFile);
                final RecordReader reader = createReader(in, journalFile.getName(), new StandardTocReader(tocFile), 32 * 1024)) {
                for (final long id : eventIds) {
                    time(() -> {
                        reader.skipToEvent(id);
                        return reader.nextRecord();
                    }, id);
                }
            }
        }
        final long ms = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
        System.out.println(ms + " ms total");
    }
}
Also used : StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) File(java.io.File) FileInputStream(java.io.FileInputStream) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 9 with StandardTocWriter

use of org.apache.nifi.provenance.toc.StandardTocWriter in project nifi by apache.

the class TestPartitionedWriteAheadEventStore method testPerformanceOfAccessingEvents.

@Test
@Ignore
public void testPerformanceOfAccessingEvents() throws Exception {
    final RecordWriterFactory recordWriterFactory = (file, idGenerator, compressed, createToc) -> {
        final TocWriter tocWriter = createToc ? new StandardTocWriter(TocUtil.getTocFile(file), false, false) : null;
        return new EventIdFirstSchemaRecordWriter(file, idGenerator, tocWriter, compressed, 1024 * 1024, IdentifierLookup.EMPTY);
    };
    final RecordReaderFactory recordReaderFactory = (file, logs, maxChars) -> RecordReaders.newRecordReader(file, logs, maxChars);
    final PartitionedWriteAheadEventStore store = new PartitionedWriteAheadEventStore(createConfig(), recordWriterFactory, recordReaderFactory, EventReporter.NO_OP, new EventFileManager());
    store.initialize();
    assertEquals(-1, store.getMaxEventId());
    for (int i = 0; i < 100_000; i++) {
        final ProvenanceEventRecord event1 = createEvent();
        store.addEvents(Collections.singleton(event1));
    }
    final List<Long> eventIdList = Arrays.asList(4L, 80L, 1024L, 40_000L, 80_000L, 99_000L);
    while (true) {
        for (int i = 0; i < 100; i++) {
            time(() -> store.getEvents(eventIdList, EventAuthorizer.GRANT_ALL, EventTransformer.EMPTY_TRANSFORMER), "Fetch Events");
        }
        Thread.sleep(1000L);
    }
}
Also used : Arrays(java.util.Arrays) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) AccessDeniedException(org.apache.nifi.authorization.AccessDeniedException) IdentifierLookup(org.apache.nifi.provenance.IdentifierLookup) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) RecordReaders(org.apache.nifi.provenance.serialization.RecordReaders) ArrayList(java.util.ArrayList) TocWriter(org.apache.nifi.provenance.toc.TocWriter) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) TestName(org.junit.rules.TestName) Map(java.util.Map) TocUtil(org.apache.nifi.provenance.toc.TocUtil) Before(org.junit.Before) EventTransformer(org.apache.nifi.provenance.authorization.EventTransformer) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) Assert.assertNotNull(org.junit.Assert.assertNotNull) ProvenanceEventType(org.apache.nifi.provenance.ProvenanceEventType) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Test(org.junit.Test) UUID(java.util.UUID) File(java.io.File) StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) RecordWriters(org.apache.nifi.provenance.serialization.RecordWriters) Rule(org.junit.Rule) EventReporter(org.apache.nifi.events.EventReporter) Ignore(org.junit.Ignore) RepositoryConfiguration(org.apache.nifi.provenance.RepositoryConfiguration) Assert.assertFalse(org.junit.Assert.assertFalse) Collections(java.util.Collections) EventIdFirstSchemaRecordWriter(org.apache.nifi.provenance.EventIdFirstSchemaRecordWriter) Assert.assertEquals(org.junit.Assert.assertEquals) EventAuthorizer(org.apache.nifi.provenance.authorization.EventAuthorizer) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) EventIdFirstSchemaRecordWriter(org.apache.nifi.provenance.EventIdFirstSchemaRecordWriter) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) AtomicLong(java.util.concurrent.atomic.AtomicLong) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 10 with StandardTocWriter

use of org.apache.nifi.provenance.toc.StandardTocWriter in project nifi by apache.

the class TestWriteAheadStorePartition method testReindex.

@Test
@SuppressWarnings("unchecked")
public void testReindex() throws IOException {
    final RepositoryConfiguration repoConfig = createConfig(1, "testReindex");
    repoConfig.setMaxEventFileCount(5);
    final String partitionName = repoConfig.getStorageDirectories().keySet().iterator().next();
    final File storageDirectory = repoConfig.getStorageDirectories().values().iterator().next();
    final RecordWriterFactory recordWriterFactory = (file, idGenerator, compressed, createToc) -> {
        final TocWriter tocWriter = createToc ? new StandardTocWriter(TocUtil.getTocFile(file), false, false) : null;
        return new EventIdFirstSchemaRecordWriter(file, idGenerator, tocWriter, compressed, 32 * 1024, IdentifierLookup.EMPTY);
    };
    final RecordReaderFactory recordReaderFactory = (file, logs, maxChars) -> RecordReaders.newRecordReader(file, logs, maxChars);
    final WriteAheadStorePartition partition = new WriteAheadStorePartition(storageDirectory, partitionName, repoConfig, recordWriterFactory, recordReaderFactory, new LinkedBlockingQueue<>(), new AtomicLong(0L), EventReporter.NO_OP);
    for (int i = 0; i < 100; i++) {
        partition.addEvents(Collections.singleton(TestUtil.createEvent()));
    }
    final Map<ProvenanceEventRecord, StorageSummary> reindexedEvents = new ConcurrentHashMap<>();
    final EventIndex eventIndex = Mockito.mock(EventIndex.class);
    Mockito.doAnswer(new Answer<Object>() {

        @Override
        public Object answer(final InvocationOnMock invocation) throws Throwable {
            final Map<ProvenanceEventRecord, StorageSummary> events = invocation.getArgumentAt(0, Map.class);
            reindexedEvents.putAll(events);
            return null;
        }
    }).when(eventIndex).reindexEvents(Mockito.anyMap());
    Mockito.doReturn(18L).when(eventIndex).getMinimumEventIdToReindex("1");
    partition.reindexLatestEvents(eventIndex);
    final List<Long> eventIdsReindexed = reindexedEvents.values().stream().map(StorageSummary::getEventId).sorted().collect(Collectors.toList());
    assertEquals(82, eventIdsReindexed.size());
    for (int i = 0; i < eventIdsReindexed.size(); i++) {
        assertEquals(18 + i, eventIdsReindexed.get(i).intValue());
    }
}
Also used : StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) IdentifierLookup(org.apache.nifi.provenance.IdentifierLookup) RecordReaders(org.apache.nifi.provenance.serialization.RecordReaders) TocWriter(org.apache.nifi.provenance.toc.TocWriter) Answer(org.mockito.stubbing.Answer) InvocationOnMock(org.mockito.invocation.InvocationOnMock) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) Map(java.util.Map) TocUtil(org.apache.nifi.provenance.toc.TocUtil) EventIndex(org.apache.nifi.provenance.index.EventIndex) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) IOException(java.io.IOException) Test(org.junit.Test) UUID(java.util.UUID) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Collectors(java.util.stream.Collectors) File(java.io.File) AtomicLong(java.util.concurrent.atomic.AtomicLong) Mockito(org.mockito.Mockito) List(java.util.List) EventReporter(org.apache.nifi.events.EventReporter) RepositoryConfiguration(org.apache.nifi.provenance.RepositoryConfiguration) TestUtil(org.apache.nifi.provenance.TestUtil) Collections(java.util.Collections) EventIdFirstSchemaRecordWriter(org.apache.nifi.provenance.EventIdFirstSchemaRecordWriter) Assert.assertEquals(org.junit.Assert.assertEquals) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) RepositoryConfiguration(org.apache.nifi.provenance.RepositoryConfiguration) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) EventIdFirstSchemaRecordWriter(org.apache.nifi.provenance.EventIdFirstSchemaRecordWriter) AtomicLong(java.util.concurrent.atomic.AtomicLong) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) InvocationOnMock(org.mockito.invocation.InvocationOnMock) AtomicLong(java.util.concurrent.atomic.AtomicLong) File(java.io.File) EventIndex(org.apache.nifi.provenance.index.EventIndex) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Test(org.junit.Test)

Aggregations

StandardTocWriter (org.apache.nifi.provenance.toc.StandardTocWriter)21 File (java.io.File)18 TocWriter (org.apache.nifi.provenance.toc.TocWriter)18 Test (org.junit.Test)17 StandardTocReader (org.apache.nifi.provenance.toc.StandardTocReader)15 RecordWriter (org.apache.nifi.provenance.serialization.RecordWriter)14 FileInputStream (java.io.FileInputStream)12 RecordReader (org.apache.nifi.provenance.serialization.RecordReader)12 TocReader (org.apache.nifi.provenance.toc.TocReader)12 HashMap (java.util.HashMap)8 IOException (java.io.IOException)6 ArrayList (java.util.ArrayList)5 RecordReaders (org.apache.nifi.provenance.serialization.RecordReaders)5 TocUtil (org.apache.nifi.provenance.toc.TocUtil)5 Collections (java.util.Collections)4 List (java.util.List)4 EventReporter (org.apache.nifi.events.EventReporter)4 Ignore (org.junit.Ignore)4 InputStream (java.io.InputStream)3 Map (java.util.Map)3