Search in sources :

Example 26 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class TestEventIdFirstSchemaRecordReaderWriter method testContentClaimChanged.

@Test
public void testContentClaimChanged() throws IOException {
    final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite.gz");
    final File tocFile = TocUtil.getTocFile(journalFile);
    final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
    final RecordWriter writer = createWriter(journalFile, tocWriter, true, 8192);
    final Map<String, String> attributes = new HashMap<>();
    attributes.put("filename", "1.txt");
    attributes.put("uuid", UUID.randomUUID().toString());
    final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder();
    builder.setEventTime(System.currentTimeMillis());
    builder.setEventType(ProvenanceEventType.RECEIVE);
    builder.setTransitUri("nifi://unit-test");
    builder.fromFlowFile(TestUtil.createFlowFile(3L, 3000L, attributes));
    builder.setComponentId("1234");
    builder.setComponentType("dummy processor");
    builder.setPreviousContentClaim("container-1", "section-1", "identifier-1", 1L, 1L);
    builder.setCurrentContentClaim("container-2", "section-2", "identifier-2", 2L, 2L);
    final ProvenanceEventRecord record = builder.build();
    writer.writeHeader(1L);
    writer.writeRecord(record);
    writer.close();
    final TocReader tocReader = new StandardTocReader(tocFile);
    try (final FileInputStream fis = new FileInputStream(journalFile);
        final RecordReader reader = createReader(fis, journalFile.getName(), tocReader, 2048)) {
        assertEquals(0, reader.getBlockIndex());
        reader.skipToBlock(0);
        final StandardProvenanceEventRecord recovered = reader.nextRecord();
        assertNotNull(recovered);
        assertEquals("nifi://unit-test", recovered.getTransitUri());
        assertEquals("container-1", recovered.getPreviousContentClaimContainer());
        assertEquals("container-2", recovered.getContentClaimContainer());
        assertEquals("section-1", recovered.getPreviousContentClaimSection());
        assertEquals("section-2", recovered.getContentClaimSection());
        assertEquals("identifier-1", recovered.getPreviousContentClaimIdentifier());
        assertEquals("identifier-2", recovered.getContentClaimIdentifier());
        assertEquals(1L, recovered.getPreviousContentClaimOffset().longValue());
        assertEquals(2L, recovered.getContentClaimOffset().longValue());
        assertEquals(1L, recovered.getPreviousFileSize().longValue());
        assertEquals(2L, recovered.getContentClaimOffset().longValue());
        assertNull(reader.nextRecord());
    }
    FileUtils.deleteFile(journalFile.getParentFile(), true);
}
Also used : TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) HashMap(java.util.HashMap) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) FileInputStream(java.io.FileInputStream) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) File(java.io.File) Test(org.junit.Test)

Example 27 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class TestEventIdFirstSchemaRecordReaderWriter method testContentClaimUnchanged.

@Test
public void testContentClaimUnchanged() throws IOException {
    final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite.gz");
    final File tocFile = TocUtil.getTocFile(journalFile);
    final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
    final RecordWriter writer = createWriter(journalFile, tocWriter, true, 8192);
    final Map<String, String> attributes = new HashMap<>();
    attributes.put("filename", "1.txt");
    attributes.put("uuid", UUID.randomUUID().toString());
    final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder();
    builder.setEventTime(System.currentTimeMillis());
    builder.setEventType(ProvenanceEventType.RECEIVE);
    builder.setTransitUri("nifi://unit-test");
    builder.fromFlowFile(TestUtil.createFlowFile(3L, 3000L, attributes));
    builder.setComponentId("1234");
    builder.setComponentType("dummy processor");
    builder.setPreviousContentClaim("container-1", "section-1", "identifier-1", 1L, 1L);
    builder.setCurrentContentClaim("container-1", "section-1", "identifier-1", 1L, 1L);
    final ProvenanceEventRecord record = builder.build();
    writer.writeHeader(1L);
    writer.writeRecord(record);
    writer.close();
    final TocReader tocReader = new StandardTocReader(tocFile);
    try (final FileInputStream fis = new FileInputStream(journalFile);
        final RecordReader reader = createReader(fis, journalFile.getName(), tocReader, 2048)) {
        assertEquals(0, reader.getBlockIndex());
        reader.skipToBlock(0);
        final StandardProvenanceEventRecord recovered = reader.nextRecord();
        assertNotNull(recovered);
        assertEquals("nifi://unit-test", recovered.getTransitUri());
        assertEquals("container-1", recovered.getPreviousContentClaimContainer());
        assertEquals("container-1", recovered.getContentClaimContainer());
        assertEquals("section-1", recovered.getPreviousContentClaimSection());
        assertEquals("section-1", recovered.getContentClaimSection());
        assertEquals("identifier-1", recovered.getPreviousContentClaimIdentifier());
        assertEquals("identifier-1", recovered.getContentClaimIdentifier());
        assertEquals(1L, recovered.getPreviousContentClaimOffset().longValue());
        assertEquals(1L, recovered.getContentClaimOffset().longValue());
        assertEquals(1L, recovered.getPreviousFileSize().longValue());
        assertEquals(1L, recovered.getContentClaimOffset().longValue());
        assertNull(reader.nextRecord());
    }
    FileUtils.deleteFile(journalFile.getParentFile(), true);
}
Also used : TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) HashMap(java.util.HashMap) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) FileInputStream(java.io.FileInputStream) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) File(java.io.File) Test(org.junit.Test)

Example 28 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class TestSchemaRecordReaderWriter method testFieldAddedToSchema.

@Test
public void testFieldAddedToSchema() throws IOException {
    final RecordField unitTestField = new SimpleRecordField("Unit Test Field", FieldType.STRING, Repetition.EXACTLY_ONE);
    final Consumer<List<RecordField>> schemaModifier = fields -> fields.add(unitTestField);
    final Map<RecordField, Object> toAdd = new HashMap<>();
    toAdd.put(unitTestField, "hello");
    try (final ByteArraySchemaRecordWriter writer = createSchemaWriter(schemaModifier, toAdd)) {
        writer.writeHeader(1L);
        writer.writeRecord(createEvent());
        writer.writeRecord(createEvent());
    }
    try (final InputStream in = new FileInputStream(journalFile);
        final TocReader tocReader = new StandardTocReader(tocFile);
        final RecordReader reader = createReader(in, journalFile.getName(), tocReader, 10000)) {
        for (int i = 0; i < 2; i++) {
            final StandardProvenanceEventRecord event = reader.nextRecord();
            assertNotNull(event);
            assertEquals("1234", event.getComponentId());
            assertEquals(ProvenanceEventType.RECEIVE, event.getEventType());
            assertNotNull(event.getUpdatedAttributes());
            assertFalse(event.getUpdatedAttributes().isEmpty());
        }
    }
}
Also used : StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocReader(org.apache.nifi.provenance.toc.TocReader) Record(org.apache.nifi.repository.schema.Record) ByteArrayOutputStream(java.io.ByteArrayOutputStream) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) ArrayList(java.util.ArrayList) NopTocWriter(org.apache.nifi.provenance.toc.NopTocWriter) RecordSchema(org.apache.nifi.repository.schema.RecordSchema) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) TocWriter(org.apache.nifi.provenance.toc.TocWriter) FieldType(org.apache.nifi.repository.schema.FieldType) DataOutputStream(java.io.DataOutputStream) Map(java.util.Map) TocUtil(org.apache.nifi.provenance.toc.TocUtil) Repetition(org.apache.nifi.repository.schema.Repetition) Before(org.junit.Before) OutputStream(java.io.OutputStream) NullOutputStream(org.apache.nifi.stream.io.NullOutputStream) Assert.assertNotNull(org.junit.Assert.assertNotNull) EventFieldNames(org.apache.nifi.provenance.schema.EventFieldNames) RecordField(org.apache.nifi.repository.schema.RecordField) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Test(org.junit.Test) FileInputStream(java.io.FileInputStream) UUID(java.util.UUID) File(java.io.File) FieldMapRecord(org.apache.nifi.repository.schema.FieldMapRecord) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) Assert.assertNull(org.junit.Assert.assertNull) EventRecord(org.apache.nifi.provenance.schema.EventRecord) Ignore(org.junit.Ignore) Assert.assertFalse(org.junit.Assert.assertFalse) ProvenanceEventSchema(org.apache.nifi.provenance.schema.ProvenanceEventSchema) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) Assert(org.junit.Assert) Assert.assertEquals(org.junit.Assert.assertEquals) InputStream(java.io.InputStream) TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) RecordField(org.apache.nifi.repository.schema.RecordField) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) HashMap(java.util.HashMap) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) FileInputStream(java.io.FileInputStream) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) ArrayList(java.util.ArrayList) List(java.util.List) Test(org.junit.Test)

Example 29 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class TestSchemaRecordReaderWriter method testReadPerformance.

@Test
@Ignore("For local performance testing only")
public void testReadPerformance() throws IOException, InterruptedException {
    // This is a simple micro-benchmarking test so that we can determine how fast the serialization/deserialization is before
    // making significant changes. This allows us to ensure that changes that we make do not have significant adverse effects
    // on performance of the repository.
    final ProvenanceEventRecord event = createEvent();
    final TocReader tocReader = null;
    final byte[] header;
    try (final ByteArrayOutputStream headerOut = new ByteArrayOutputStream();
        final DataOutputStream out = new DataOutputStream(headerOut)) {
        final RecordWriter schemaWriter = new ByteArraySchemaRecordWriter(out, "out", idGenerator, null, false, 0);
        schemaWriter.writeHeader(1L);
        header = headerOut.toByteArray();
    }
    final byte[] serializedRecord;
    try (final ByteArrayOutputStream headerOut = new ByteArrayOutputStream();
        final RecordWriter writer = new ByteArraySchemaRecordWriter(headerOut, "out", idGenerator, null, false, 0)) {
        writer.writeHeader(1L);
        headerOut.reset();
        writer.writeRecord(event);
        writer.flush();
        serializedRecord = headerOut.toByteArray();
    }
    final int numEvents = 10_000_000;
    final int recordBytes = serializedRecord.length;
    final long totalRecordBytes = (long) recordBytes * (long) numEvents;
    final long startNanos = System.nanoTime();
    try (final InputStream in = new LoopingInputStream(header, serializedRecord);
        final RecordReader reader = new ByteArraySchemaRecordReader(in, "filename", tocReader, 100000)) {
        for (int i = 0; i < numEvents; i++) {
            reader.nextRecord();
        }
    }
    final long nanos = System.nanoTime() - startNanos;
    final long millis = TimeUnit.NANOSECONDS.toMillis(nanos);
    final double seconds = millis / 1000D;
    final long bytesPerSecond = (long) (totalRecordBytes / seconds);
    final long megaBytesPerSecond = bytesPerSecond / 1024 / 1024;
    System.out.println("Took " + millis + " millis to read " + numEvents + " events or " + megaBytesPerSecond + " MB/sec");
}
Also used : TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) DataOutputStream(java.io.DataOutputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) ByteArrayOutputStream(java.io.ByteArrayOutputStream) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 30 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class TestStandardRecordReaderWriter method testReadPerformance.

@Test
@Ignore("For local testing only")
public void testReadPerformance() throws IOException {
    // This is a simple micro-benchmarking test so that we can determine how fast the serialization/deserialization is before
    // making significant changes. This allows us to ensure that changes that we make do not have significant adverse effects
    // on performance of the repository.
    final ProvenanceEventRecord event = createEvent();
    final TocReader tocReader = null;
    final byte[] header;
    try (final ByteArrayOutputStream headerOut = new ByteArrayOutputStream();
        final DataOutputStream out = new DataOutputStream(headerOut)) {
        out.writeUTF(PersistentProvenanceRepository.class.getName());
        out.writeInt(9);
        header = headerOut.toByteArray();
    }
    final byte[] serializedRecord;
    try (final ByteArrayOutputStream headerOut = new ByteArrayOutputStream();
        final StandardRecordWriter writer = new StandardRecordWriter(headerOut, "devnull", idGenerator, null, false, 0)) {
        writer.writeHeader(1L);
        headerOut.reset();
        writer.writeRecord(event);
        writer.flush();
        serializedRecord = headerOut.toByteArray();
    }
    final int numEvents = 10_000_000;
    final long startNanos = System.nanoTime();
    try (final InputStream in = new LoopingInputStream(header, serializedRecord);
        final RecordReader reader = new StandardRecordReader(in, "filename", tocReader, 100000)) {
        for (int i = 0; i < numEvents; i++) {
            reader.nextRecord();
        }
    }
    final long nanos = System.nanoTime() - startNanos;
    final long millis = TimeUnit.NANOSECONDS.toMillis(nanos);
    System.out.println("Took " + millis + " millis to read " + numEvents + " events");
}
Also used : TocReader(org.apache.nifi.provenance.toc.TocReader) DataOutputStream(java.io.DataOutputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

RecordReader (org.apache.nifi.provenance.serialization.RecordReader)37 File (java.io.File)30 Test (org.junit.Test)18 FileInputStream (java.io.FileInputStream)16 IOException (java.io.IOException)16 RecordWriter (org.apache.nifi.provenance.serialization.RecordWriter)16 TocReader (org.apache.nifi.provenance.toc.TocReader)16 StandardTocReader (org.apache.nifi.provenance.toc.StandardTocReader)15 StandardTocWriter (org.apache.nifi.provenance.toc.StandardTocWriter)14 ArrayList (java.util.ArrayList)13 HashMap (java.util.HashMap)12 TocWriter (org.apache.nifi.provenance.toc.TocWriter)12 InputStream (java.io.InputStream)7 EOFException (java.io.EOFException)6 AtomicLong (java.util.concurrent.atomic.AtomicLong)6 Ignore (org.junit.Ignore)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 DataOutputStream (java.io.DataOutputStream)5 Path (java.nio.file.Path)5 Callable (java.util.concurrent.Callable)5