Search in sources :

Example 6 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class TestPersistentProvenanceRepository method checkJournalRecords.

private long checkJournalRecords(final File storageDir, final Boolean exact) throws IOException {
    File[] storagefiles = storageDir.listFiles();
    long counter = 0;
    assertNotNull(storagefiles);
    for (final File file : storagefiles) {
        if (file.isFile()) {
            try (RecordReader reader = RecordReaders.newRecordReader(file, null, 2048)) {
                ProvenanceEventRecord r;
                ProvenanceEventRecord last = null;
                while ((r = reader.nextRecord()) != null) {
                    if (exact) {
                        assertTrue(counter++ == r.getEventId());
                    } else {
                        assertTrue(counter++ <= r.getEventId());
                    }
                }
            }
        }
    }
    return counter;
}
Also used : RecordReader(org.apache.nifi.provenance.serialization.RecordReader) TestUtil.createFlowFile(org.apache.nifi.provenance.TestUtil.createFlowFile) FlowFile(org.apache.nifi.flowfile.FlowFile) File(java.io.File)

Example 7 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class TestPersistentProvenanceRepository method testMergeJournals.

@Test
public void testMergeJournals() throws IOException, InterruptedException {
    assumeFalse(isWindowsEnvironment());
    final RepositoryConfiguration config = createConfiguration();
    config.setMaxEventFileLife(3, TimeUnit.SECONDS);
    repo = new PersistentProvenanceRepository(config, DEFAULT_ROLLOVER_MILLIS);
    repo.initialize(getEventReporter(), null, null, IdentifierLookup.EMPTY);
    final Map<String, String> attributes = new HashMap<>();
    final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder();
    builder.setEventTime(System.currentTimeMillis());
    builder.setEventType(ProvenanceEventType.RECEIVE);
    builder.setTransitUri("nifi://unit-test");
    attributes.put("uuid", "12345678-0000-0000-0000-012345678912");
    builder.fromFlowFile(createFlowFile(3L, 3000L, attributes));
    builder.setComponentId("1234");
    builder.setComponentType("dummy processor");
    final ProvenanceEventRecord record = builder.build();
    final ExecutorService exec = Executors.newFixedThreadPool(10);
    for (int i = 0; i < 10000; i++) {
        exec.submit(new Runnable() {

            @Override
            public void run() {
                repo.registerEvent(record);
            }
        });
    }
    repo.waitForRollover();
    final File storageDir = config.getStorageDirectories().values().iterator().next();
    long counter = 0;
    for (final File file : storageDir.listFiles()) {
        if (file.isFile()) {
            try (RecordReader reader = RecordReaders.newRecordReader(file, null, 2048)) {
                ProvenanceEventRecord r = null;
                while ((r = reader.nextRecord()) != null) {
                    assertEquals(counter++, r.getEventId());
                }
            }
        }
    }
    assertEquals(10000, counter);
}
Also used : HashMap(java.util.HashMap) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) ExecutorService(java.util.concurrent.ExecutorService) TestUtil.createFlowFile(org.apache.nifi.provenance.TestUtil.createFlowFile) FlowFile(org.apache.nifi.flowfile.FlowFile) File(java.io.File) Test(org.junit.Test)

Example 8 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class TestSchemaRecordReaderWriter method testFieldRemovedFromSchema.

@Test
public void testFieldRemovedFromSchema() throws IOException {
    final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
    try {
        // Create a schema that has the fields modified
        final RecordSchema schemaV1 = ProvenanceEventSchema.PROVENANCE_EVENT_SCHEMA_V1;
        final List<RecordField> fields = new ArrayList<>(schemaV1.getFields());
        fields.remove(new SimpleRecordField(EventFieldNames.UPDATED_ATTRIBUTES, FieldType.STRING, Repetition.EXACTLY_ONE));
        fields.remove(new SimpleRecordField(EventFieldNames.PREVIOUS_ATTRIBUTES, FieldType.STRING, Repetition.EXACTLY_ONE));
        final RecordSchema recordSchema = new RecordSchema(fields);
        // Create a record writer whose schema does not contain updated attributes or previous attributes.
        // This means that we must also override the method that writes out attributes so that we are able
        // to avoid actually writing them out.
        final ByteArraySchemaRecordWriter writer = new ByteArraySchemaRecordWriter(journalFile, idGenerator, tocWriter, false, 0) {

            @Override
            public void writeHeader(long firstEventId, DataOutputStream out) throws IOException {
                final ByteArrayOutputStream baos = new ByteArrayOutputStream();
                recordSchema.writeTo(baos);
                out.writeInt(baos.size());
                baos.writeTo(out);
            }

            @Override
            protected Record createRecord(final ProvenanceEventRecord event, final long eventId) {
                final RecordSchema contentClaimSchema = new RecordSchema(recordSchema.getField(EventFieldNames.CONTENT_CLAIM).getSubFields());
                return new EventRecord(event, eventId, recordSchema, contentClaimSchema);
            }
        };
        try {
            writer.writeHeader(1L);
            writer.writeRecord(createEvent());
            writer.writeRecord(createEvent());
        } finally {
            writer.close();
        }
    } finally {
        tocWriter.close();
    }
    // Read the records in and make sure that they have the info that we expect.
    try (final InputStream in = new FileInputStream(journalFile);
        final TocReader tocReader = new StandardTocReader(tocFile);
        final RecordReader reader = createReader(in, journalFile.getName(), tocReader, 10000)) {
        for (int i = 0; i < 2; i++) {
            final StandardProvenanceEventRecord event = reader.nextRecord();
            assertNotNull(event);
            assertEquals(ProvenanceEventType.RECEIVE, event.getEventType());
            // We will still have a Map<String, String> for updated attributes because the
            // Provenance Event Builder will create an empty map.
            assertNotNull(event.getUpdatedAttributes());
            assertTrue(event.getUpdatedAttributes().isEmpty());
        }
    }
}
Also used : TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) RecordField(org.apache.nifi.repository.schema.RecordField) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) DataOutputStream(java.io.DataOutputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) FileInputStream(java.io.FileInputStream) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) NopTocWriter(org.apache.nifi.provenance.toc.NopTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) EventRecord(org.apache.nifi.provenance.schema.EventRecord) RecordSchema(org.apache.nifi.repository.schema.RecordSchema) Test(org.junit.Test)

Example 9 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class TestSchemaRecordReaderWriter method testAddOneRecordReadTwice.

@Test
public void testAddOneRecordReadTwice() throws IOException {
    final RecordField unitTestField = new SimpleRecordField("Unit Test Field", FieldType.STRING, Repetition.EXACTLY_ONE);
    final Consumer<List<RecordField>> schemaModifier = fields -> fields.add(unitTestField);
    final Map<RecordField, Object> toAdd = new HashMap<>();
    toAdd.put(unitTestField, "hello");
    try (final ByteArraySchemaRecordWriter writer = createSchemaWriter(schemaModifier, toAdd)) {
        writer.writeHeader(1L);
        writer.writeRecord(createEvent());
    }
    try (final InputStream in = new FileInputStream(journalFile);
        final TocReader tocReader = new StandardTocReader(tocFile);
        final RecordReader reader = createReader(in, journalFile.getName(), tocReader, 10000)) {
        final ProvenanceEventRecord firstEvent = reader.nextRecord();
        assertNotNull(firstEvent);
        final ProvenanceEventRecord secondEvent = reader.nextRecord();
        assertNull(secondEvent);
    }
}
Also used : StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocReader(org.apache.nifi.provenance.toc.TocReader) Record(org.apache.nifi.repository.schema.Record) ByteArrayOutputStream(java.io.ByteArrayOutputStream) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) ArrayList(java.util.ArrayList) NopTocWriter(org.apache.nifi.provenance.toc.NopTocWriter) RecordSchema(org.apache.nifi.repository.schema.RecordSchema) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) TocWriter(org.apache.nifi.provenance.toc.TocWriter) FieldType(org.apache.nifi.repository.schema.FieldType) DataOutputStream(java.io.DataOutputStream) Map(java.util.Map) TocUtil(org.apache.nifi.provenance.toc.TocUtil) Repetition(org.apache.nifi.repository.schema.Repetition) Before(org.junit.Before) OutputStream(java.io.OutputStream) NullOutputStream(org.apache.nifi.stream.io.NullOutputStream) Assert.assertNotNull(org.junit.Assert.assertNotNull) EventFieldNames(org.apache.nifi.provenance.schema.EventFieldNames) RecordField(org.apache.nifi.repository.schema.RecordField) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Test(org.junit.Test) FileInputStream(java.io.FileInputStream) UUID(java.util.UUID) File(java.io.File) FieldMapRecord(org.apache.nifi.repository.schema.FieldMapRecord) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) Assert.assertNull(org.junit.Assert.assertNull) EventRecord(org.apache.nifi.provenance.schema.EventRecord) Ignore(org.junit.Ignore) Assert.assertFalse(org.junit.Assert.assertFalse) ProvenanceEventSchema(org.apache.nifi.provenance.schema.ProvenanceEventSchema) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) Assert(org.junit.Assert) Assert.assertEquals(org.junit.Assert.assertEquals) InputStream(java.io.InputStream) TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) RecordField(org.apache.nifi.repository.schema.RecordField) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) HashMap(java.util.HashMap) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) FileInputStream(java.io.FileInputStream) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) ArrayList(java.util.ArrayList) List(java.util.List) Test(org.junit.Test)

Example 10 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class TestSchemaRecordReaderWriter method testPerformanceOfRandomAccessReads.

@Test
@Ignore("runs forever for performance analysis/profiling")
public void testPerformanceOfRandomAccessReads() throws Exception {
    journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testPerformanceOfRandomAccessReads.gz");
    tocFile = TocUtil.getTocFile(journalFile);
    try (final RecordWriter writer = createWriter(journalFile, new StandardTocWriter(tocFile, true, false), true, 1024 * 32)) {
        writer.writeHeader(0L);
        for (int i = 0; i < 100_000; i++) {
            writer.writeRecord(createEvent());
        }
    }
    final long[] eventIds = new long[] { 4, 80, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 40_000, 80_000, 99_000 };
    boolean loopForever = true;
    while (loopForever) {
        final long start = System.nanoTime();
        for (int i = 0; i < 1000; i++) {
            try (final InputStream in = new FileInputStream(journalFile);
                final RecordReader reader = createReader(in, journalFile.getName(), new StandardTocReader(tocFile), 32 * 1024)) {
                for (final long id : eventIds) {
                    time(() -> {
                        reader.skipToEvent(id);
                        return reader.nextRecord();
                    }, id);
                }
            }
        }
        final long ms = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
        System.out.println(ms + " ms total");
    }
}
Also used : StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) File(java.io.File) FileInputStream(java.io.FileInputStream) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

RecordReader (org.apache.nifi.provenance.serialization.RecordReader)37 File (java.io.File)30 Test (org.junit.Test)18 FileInputStream (java.io.FileInputStream)16 IOException (java.io.IOException)16 RecordWriter (org.apache.nifi.provenance.serialization.RecordWriter)16 TocReader (org.apache.nifi.provenance.toc.TocReader)16 StandardTocReader (org.apache.nifi.provenance.toc.StandardTocReader)15 StandardTocWriter (org.apache.nifi.provenance.toc.StandardTocWriter)14 ArrayList (java.util.ArrayList)13 HashMap (java.util.HashMap)12 TocWriter (org.apache.nifi.provenance.toc.TocWriter)12 InputStream (java.io.InputStream)7 EOFException (java.io.EOFException)6 AtomicLong (java.util.concurrent.atomic.AtomicLong)6 Ignore (org.junit.Ignore)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 DataOutputStream (java.io.DataOutputStream)5 Path (java.nio.file.Path)5 Callable (java.util.concurrent.Callable)5