Search in sources :

Example 1 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class AbstractTestRecordReaderWriter method testMultipleRecordsMultipleBlocksCompressed.

@Test
public void testMultipleRecordsMultipleBlocksCompressed() throws IOException {
    final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite.gz");
    final File tocFile = TocUtil.getTocFile(journalFile);
    final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
    // new block each 10 bytes
    final RecordWriter writer = createWriter(journalFile, tocWriter, true, 100);
    writer.writeHeader(1L);
    for (int i = 0; i < 10; i++) {
        writer.writeRecord(createEvent());
    }
    writer.close();
    final TocReader tocReader = new StandardTocReader(tocFile);
    try (final FileInputStream fis = new FileInputStream(journalFile);
        final RecordReader reader = createReader(fis, journalFile.getName(), tocReader, 2048)) {
        for (int i = 0; i < 10; i++) {
            final StandardProvenanceEventRecord recovered = reader.nextRecord();
            System.out.println(recovered);
            assertNotNull(recovered);
            assertEquals(i, recovered.getEventId());
            assertEquals("nifi://unit-test", recovered.getTransitUri());
            final Map<String, String> updatedAttrs = recovered.getUpdatedAttributes();
            assertNotNull(updatedAttrs);
            assertEquals(2, updatedAttrs.size());
            assertEquals("1.txt", updatedAttrs.get("filename"));
            assertTrue(updatedAttrs.containsKey("uuid"));
        }
        assertNull(reader.nextRecord());
    }
    FileUtils.deleteFile(journalFile.getParentFile(), true);
}
Also used : StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) File(java.io.File) FileInputStream(java.io.FileInputStream) Test(org.junit.Test)

Example 2 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class TestEventIdFirstSchemaRecordReaderWriter method testPerformanceOfRandomAccessReads.

@Test
@Ignore
public void testPerformanceOfRandomAccessReads() throws Exception {
    journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testPerformanceOfRandomAccessReads.gz");
    tocFile = TocUtil.getTocFile(journalFile);
    final int blockSize = 1024 * 32;
    try (final RecordWriter writer = createWriter(journalFile, new StandardTocWriter(tocFile, true, false), true, blockSize)) {
        writer.writeHeader(0L);
        for (int i = 0; i < 100_000; i++) {
            writer.writeRecord(createEvent());
        }
    }
    final long[] eventIds = new long[] { 4, 80, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 40_000, 80_000, 99_000 };
    boolean loopForever = true;
    while (loopForever) {
        final long start = System.nanoTime();
        for (int i = 0; i < 1000; i++) {
            try (final InputStream in = new FileInputStream(journalFile);
                final RecordReader reader = createReader(in, journalFile.getName(), new StandardTocReader(tocFile), 32 * 1024)) {
                for (final long id : eventIds) {
                    time(() -> {
                        reader.skipToEvent(id);
                        return reader.nextRecord();
                    }, id);
                }
            }
        }
        final long ms = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
        System.out.println(ms + " ms total");
    }
}
Also used : StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) File(java.io.File) FileInputStream(java.io.FileInputStream) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 3 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class TestEventIdFirstSchemaRecordReaderWriter method testComponentIdInlineAndLookup.

@Test
public void testComponentIdInlineAndLookup() throws IOException {
    final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite.prov");
    final File tocFile = TocUtil.getTocFile(journalFile);
    final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
    final IdentifierLookup lookup = new IdentifierLookup() {

        @Override
        public List<String> getQueueIdentifiers() {
            return Collections.emptyList();
        }

        @Override
        public List<String> getComponentTypes() {
            return Collections.singletonList("unit-test-component-1");
        }

        @Override
        public List<String> getComponentIdentifiers() {
            return Collections.singletonList("1234");
        }
    };
    final RecordWriter writer = new EventIdFirstSchemaRecordWriter(journalFile, idGenerator, tocWriter, false, 1024 * 32, lookup);
    final Map<String, String> attributes = new HashMap<>();
    attributes.put("filename", "1.txt");
    attributes.put("uuid", UUID.randomUUID().toString());
    final StandardProvenanceEventRecord.Builder builder = new StandardProvenanceEventRecord.Builder();
    builder.setEventId(1_000_000);
    builder.setEventTime(System.currentTimeMillis());
    builder.setEventType(ProvenanceEventType.RECEIVE);
    builder.setTransitUri("nifi://unit-test");
    builder.fromFlowFile(TestUtil.createFlowFile(3L, 3000L, attributes));
    builder.setComponentId("1234");
    builder.setComponentType("unit-test-component-2");
    builder.setPreviousContentClaim("container-1", "section-1", "identifier-1", 1L, 1L);
    builder.setCurrentContentClaim("container-2", "section-2", "identifier-2", 2L, 2L);
    writer.writeHeader(500_000L);
    writer.writeRecord(builder.build());
    builder.setEventId(1_000_001L);
    builder.setComponentId("4444");
    builder.setComponentType("unit-test-component-1");
    writer.writeRecord(builder.build());
    writer.close();
    final TocReader tocReader = new StandardTocReader(tocFile);
    try (final FileInputStream fis = new FileInputStream(journalFile);
        final RecordReader reader = createReader(fis, journalFile.getName(), tocReader, 2048)) {
        ProvenanceEventRecord event = reader.nextRecord();
        assertNotNull(event);
        assertEquals(1_000_000L, event.getEventId());
        assertEquals("1234", event.getComponentId());
        assertEquals("unit-test-component-2", event.getComponentType());
        event = reader.nextRecord();
        assertNotNull(event);
        assertEquals(1_000_001L, event.getEventId());
        assertEquals("4444", event.getComponentId());
        assertEquals("unit-test-component-1", event.getComponentType());
        assertNull(reader.nextRecord());
    }
    FileUtils.deleteFile(journalFile.getParentFile(), true);
}
Also used : TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) HashMap(java.util.HashMap) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) FileInputStream(java.io.FileInputStream) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) File(java.io.File) Test(org.junit.Test)

Example 4 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class TestEventIdFirstSchemaRecordReaderWriter method testEventIdAndTimestampCorrect.

@Test
public void testEventIdAndTimestampCorrect() throws IOException {
    final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite.gz");
    final File tocFile = TocUtil.getTocFile(journalFile);
    final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
    final RecordWriter writer = createWriter(journalFile, tocWriter, true, 8192);
    final Map<String, String> attributes = new HashMap<>();
    attributes.put("filename", "1.txt");
    attributes.put("uuid", UUID.randomUUID().toString());
    final long timestamp = System.currentTimeMillis() - 10000L;
    final StandardProvenanceEventRecord.Builder builder = new StandardProvenanceEventRecord.Builder();
    builder.setEventId(1_000_000);
    builder.setEventTime(timestamp);
    builder.setEventType(ProvenanceEventType.RECEIVE);
    builder.setTransitUri("nifi://unit-test");
    builder.fromFlowFile(TestUtil.createFlowFile(3L, 3000L, attributes));
    builder.setComponentId("1234");
    builder.setComponentType("dummy processor");
    builder.setPreviousContentClaim("container-1", "section-1", "identifier-1", 1L, 1L);
    builder.setCurrentContentClaim("container-2", "section-2", "identifier-2", 2L, 2L);
    final ProvenanceEventRecord record = builder.build();
    writer.writeHeader(500_000L);
    writer.writeRecord(record);
    writer.close();
    final TocReader tocReader = new StandardTocReader(tocFile);
    try (final FileInputStream fis = new FileInputStream(journalFile);
        final RecordReader reader = createReader(fis, journalFile.getName(), tocReader, 2048)) {
        final ProvenanceEventRecord event = reader.nextRecord();
        assertNotNull(event);
        assertEquals(1_000_000L, event.getEventId());
        assertEquals(timestamp, event.getEventTime());
        assertNull(reader.nextRecord());
    }
    FileUtils.deleteFile(journalFile.getParentFile(), true);
}
Also used : TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) HashMap(java.util.HashMap) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) FileInputStream(java.io.FileInputStream) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) File(java.io.File) Test(org.junit.Test)

Example 5 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class TestEventIdFirstSchemaRecordReaderWriter method testContentClaimRemoved.

@Test
public void testContentClaimRemoved() throws IOException {
    final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite.gz");
    final File tocFile = TocUtil.getTocFile(journalFile);
    final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
    final RecordWriter writer = createWriter(journalFile, tocWriter, true, 8192);
    final Map<String, String> attributes = new HashMap<>();
    attributes.put("filename", "1.txt");
    attributes.put("uuid", UUID.randomUUID().toString());
    final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder();
    builder.setEventTime(System.currentTimeMillis());
    builder.setEventType(ProvenanceEventType.RECEIVE);
    builder.setTransitUri("nifi://unit-test");
    builder.fromFlowFile(TestUtil.createFlowFile(3L, 3000L, attributes));
    builder.setComponentId("1234");
    builder.setComponentType("dummy processor");
    builder.setPreviousContentClaim("container-1", "section-1", "identifier-1", 1L, 1L);
    builder.setCurrentContentClaim(null, null, null, 0L, 0L);
    final ProvenanceEventRecord record = builder.build();
    writer.writeHeader(1L);
    writer.writeRecord(record);
    writer.close();
    final TocReader tocReader = new StandardTocReader(tocFile);
    try (final FileInputStream fis = new FileInputStream(journalFile);
        final RecordReader reader = createReader(fis, journalFile.getName(), tocReader, 2048)) {
        assertEquals(0, reader.getBlockIndex());
        reader.skipToBlock(0);
        final StandardProvenanceEventRecord recovered = reader.nextRecord();
        assertNotNull(recovered);
        assertEquals("nifi://unit-test", recovered.getTransitUri());
        assertEquals("container-1", recovered.getPreviousContentClaimContainer());
        assertNull(recovered.getContentClaimContainer());
        assertEquals("section-1", recovered.getPreviousContentClaimSection());
        assertNull(recovered.getContentClaimSection());
        assertEquals("identifier-1", recovered.getPreviousContentClaimIdentifier());
        assertNull(recovered.getContentClaimIdentifier());
        assertEquals(1L, recovered.getPreviousContentClaimOffset().longValue());
        assertNull(recovered.getContentClaimOffset());
        assertEquals(1L, recovered.getPreviousFileSize().longValue());
        assertEquals(0L, recovered.getFileSize());
        assertNull(reader.nextRecord());
    }
    FileUtils.deleteFile(journalFile.getParentFile(), true);
}
Also used : TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) HashMap(java.util.HashMap) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) FileInputStream(java.io.FileInputStream) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) File(java.io.File) Test(org.junit.Test)

Aggregations

RecordReader (org.apache.nifi.provenance.serialization.RecordReader)37 File (java.io.File)30 Test (org.junit.Test)18 FileInputStream (java.io.FileInputStream)16 IOException (java.io.IOException)16 RecordWriter (org.apache.nifi.provenance.serialization.RecordWriter)16 TocReader (org.apache.nifi.provenance.toc.TocReader)16 StandardTocReader (org.apache.nifi.provenance.toc.StandardTocReader)15 StandardTocWriter (org.apache.nifi.provenance.toc.StandardTocWriter)14 ArrayList (java.util.ArrayList)13 HashMap (java.util.HashMap)12 TocWriter (org.apache.nifi.provenance.toc.TocWriter)12 InputStream (java.io.InputStream)7 EOFException (java.io.EOFException)6 AtomicLong (java.util.concurrent.atomic.AtomicLong)6 Ignore (org.junit.Ignore)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 DataOutputStream (java.io.DataOutputStream)5 Path (java.nio.file.Path)5 Callable (java.util.concurrent.Callable)5