Search in sources :

Example 6 with StandardTocReader

use of org.apache.nifi.provenance.toc.StandardTocReader in project nifi by apache.

the class TestEventIdFirstSchemaRecordReaderWriter method testContentClaimRemoved.

@Test
public void testContentClaimRemoved() throws IOException {
    final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite.gz");
    final File tocFile = TocUtil.getTocFile(journalFile);
    final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
    final RecordWriter writer = createWriter(journalFile, tocWriter, true, 8192);
    final Map<String, String> attributes = new HashMap<>();
    attributes.put("filename", "1.txt");
    attributes.put("uuid", UUID.randomUUID().toString());
    final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder();
    builder.setEventTime(System.currentTimeMillis());
    builder.setEventType(ProvenanceEventType.RECEIVE);
    builder.setTransitUri("nifi://unit-test");
    builder.fromFlowFile(TestUtil.createFlowFile(3L, 3000L, attributes));
    builder.setComponentId("1234");
    builder.setComponentType("dummy processor");
    builder.setPreviousContentClaim("container-1", "section-1", "identifier-1", 1L, 1L);
    builder.setCurrentContentClaim(null, null, null, 0L, 0L);
    final ProvenanceEventRecord record = builder.build();
    writer.writeHeader(1L);
    writer.writeRecord(record);
    writer.close();
    final TocReader tocReader = new StandardTocReader(tocFile);
    try (final FileInputStream fis = new FileInputStream(journalFile);
        final RecordReader reader = createReader(fis, journalFile.getName(), tocReader, 2048)) {
        assertEquals(0, reader.getBlockIndex());
        reader.skipToBlock(0);
        final StandardProvenanceEventRecord recovered = reader.nextRecord();
        assertNotNull(recovered);
        assertEquals("nifi://unit-test", recovered.getTransitUri());
        assertEquals("container-1", recovered.getPreviousContentClaimContainer());
        assertNull(recovered.getContentClaimContainer());
        assertEquals("section-1", recovered.getPreviousContentClaimSection());
        assertNull(recovered.getContentClaimSection());
        assertEquals("identifier-1", recovered.getPreviousContentClaimIdentifier());
        assertNull(recovered.getContentClaimIdentifier());
        assertEquals(1L, recovered.getPreviousContentClaimOffset().longValue());
        assertNull(recovered.getContentClaimOffset());
        assertEquals(1L, recovered.getPreviousFileSize().longValue());
        assertEquals(0L, recovered.getFileSize());
        assertNull(reader.nextRecord());
    }
    FileUtils.deleteFile(journalFile.getParentFile(), true);
}
Also used : TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) HashMap(java.util.HashMap) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) FileInputStream(java.io.FileInputStream) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) File(java.io.File) Test(org.junit.Test)

Example 7 with StandardTocReader

use of org.apache.nifi.provenance.toc.StandardTocReader in project nifi by apache.

the class TestSchemaRecordReaderWriter method testFieldRemovedFromSchema.

@Test
public void testFieldRemovedFromSchema() throws IOException {
    final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
    try {
        // Create a schema that has the fields modified
        final RecordSchema schemaV1 = ProvenanceEventSchema.PROVENANCE_EVENT_SCHEMA_V1;
        final List<RecordField> fields = new ArrayList<>(schemaV1.getFields());
        fields.remove(new SimpleRecordField(EventFieldNames.UPDATED_ATTRIBUTES, FieldType.STRING, Repetition.EXACTLY_ONE));
        fields.remove(new SimpleRecordField(EventFieldNames.PREVIOUS_ATTRIBUTES, FieldType.STRING, Repetition.EXACTLY_ONE));
        final RecordSchema recordSchema = new RecordSchema(fields);
        // Create a record writer whose schema does not contain updated attributes or previous attributes.
        // This means that we must also override the method that writes out attributes so that we are able
        // to avoid actually writing them out.
        final ByteArraySchemaRecordWriter writer = new ByteArraySchemaRecordWriter(journalFile, idGenerator, tocWriter, false, 0) {

            @Override
            public void writeHeader(long firstEventId, DataOutputStream out) throws IOException {
                final ByteArrayOutputStream baos = new ByteArrayOutputStream();
                recordSchema.writeTo(baos);
                out.writeInt(baos.size());
                baos.writeTo(out);
            }

            @Override
            protected Record createRecord(final ProvenanceEventRecord event, final long eventId) {
                final RecordSchema contentClaimSchema = new RecordSchema(recordSchema.getField(EventFieldNames.CONTENT_CLAIM).getSubFields());
                return new EventRecord(event, eventId, recordSchema, contentClaimSchema);
            }
        };
        try {
            writer.writeHeader(1L);
            writer.writeRecord(createEvent());
            writer.writeRecord(createEvent());
        } finally {
            writer.close();
        }
    } finally {
        tocWriter.close();
    }
    // Read the records in and make sure that they have the info that we expect.
    try (final InputStream in = new FileInputStream(journalFile);
        final TocReader tocReader = new StandardTocReader(tocFile);
        final RecordReader reader = createReader(in, journalFile.getName(), tocReader, 10000)) {
        for (int i = 0; i < 2; i++) {
            final StandardProvenanceEventRecord event = reader.nextRecord();
            assertNotNull(event);
            assertEquals(ProvenanceEventType.RECEIVE, event.getEventType());
            // We will still have a Map<String, String> for updated attributes because the
            // Provenance Event Builder will create an empty map.
            assertNotNull(event.getUpdatedAttributes());
            assertTrue(event.getUpdatedAttributes().isEmpty());
        }
    }
}
Also used : TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) RecordField(org.apache.nifi.repository.schema.RecordField) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) DataOutputStream(java.io.DataOutputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) FileInputStream(java.io.FileInputStream) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) NopTocWriter(org.apache.nifi.provenance.toc.NopTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) EventRecord(org.apache.nifi.provenance.schema.EventRecord) RecordSchema(org.apache.nifi.repository.schema.RecordSchema) Test(org.junit.Test)

Example 8 with StandardTocReader

use of org.apache.nifi.provenance.toc.StandardTocReader in project nifi by apache.

the class TestSchemaRecordReaderWriter method testAddOneRecordReadTwice.

@Test
public void testAddOneRecordReadTwice() throws IOException {
    final RecordField unitTestField = new SimpleRecordField("Unit Test Field", FieldType.STRING, Repetition.EXACTLY_ONE);
    final Consumer<List<RecordField>> schemaModifier = fields -> fields.add(unitTestField);
    final Map<RecordField, Object> toAdd = new HashMap<>();
    toAdd.put(unitTestField, "hello");
    try (final ByteArraySchemaRecordWriter writer = createSchemaWriter(schemaModifier, toAdd)) {
        writer.writeHeader(1L);
        writer.writeRecord(createEvent());
    }
    try (final InputStream in = new FileInputStream(journalFile);
        final TocReader tocReader = new StandardTocReader(tocFile);
        final RecordReader reader = createReader(in, journalFile.getName(), tocReader, 10000)) {
        final ProvenanceEventRecord firstEvent = reader.nextRecord();
        assertNotNull(firstEvent);
        final ProvenanceEventRecord secondEvent = reader.nextRecord();
        assertNull(secondEvent);
    }
}
Also used : StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocReader(org.apache.nifi.provenance.toc.TocReader) Record(org.apache.nifi.repository.schema.Record) ByteArrayOutputStream(java.io.ByteArrayOutputStream) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) ArrayList(java.util.ArrayList) NopTocWriter(org.apache.nifi.provenance.toc.NopTocWriter) RecordSchema(org.apache.nifi.repository.schema.RecordSchema) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) TocWriter(org.apache.nifi.provenance.toc.TocWriter) FieldType(org.apache.nifi.repository.schema.FieldType) DataOutputStream(java.io.DataOutputStream) Map(java.util.Map) TocUtil(org.apache.nifi.provenance.toc.TocUtil) Repetition(org.apache.nifi.repository.schema.Repetition) Before(org.junit.Before) OutputStream(java.io.OutputStream) NullOutputStream(org.apache.nifi.stream.io.NullOutputStream) Assert.assertNotNull(org.junit.Assert.assertNotNull) EventFieldNames(org.apache.nifi.provenance.schema.EventFieldNames) RecordField(org.apache.nifi.repository.schema.RecordField) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Test(org.junit.Test) FileInputStream(java.io.FileInputStream) UUID(java.util.UUID) File(java.io.File) FieldMapRecord(org.apache.nifi.repository.schema.FieldMapRecord) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) Assert.assertNull(org.junit.Assert.assertNull) EventRecord(org.apache.nifi.provenance.schema.EventRecord) Ignore(org.junit.Ignore) Assert.assertFalse(org.junit.Assert.assertFalse) ProvenanceEventSchema(org.apache.nifi.provenance.schema.ProvenanceEventSchema) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) Assert(org.junit.Assert) Assert.assertEquals(org.junit.Assert.assertEquals) InputStream(java.io.InputStream) TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) RecordField(org.apache.nifi.repository.schema.RecordField) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) HashMap(java.util.HashMap) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) FileInputStream(java.io.FileInputStream) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) ArrayList(java.util.ArrayList) List(java.util.List) Test(org.junit.Test)

Example 9 with StandardTocReader

use of org.apache.nifi.provenance.toc.StandardTocReader in project nifi by apache.

the class TestSchemaRecordReaderWriter method testPerformanceOfRandomAccessReads.

@Test
@Ignore("runs forever for performance analysis/profiling")
public void testPerformanceOfRandomAccessReads() throws Exception {
    journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testPerformanceOfRandomAccessReads.gz");
    tocFile = TocUtil.getTocFile(journalFile);
    try (final RecordWriter writer = createWriter(journalFile, new StandardTocWriter(tocFile, true, false), true, 1024 * 32)) {
        writer.writeHeader(0L);
        for (int i = 0; i < 100_000; i++) {
            writer.writeRecord(createEvent());
        }
    }
    final long[] eventIds = new long[] { 4, 80, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 40_000, 80_000, 99_000 };
    boolean loopForever = true;
    while (loopForever) {
        final long start = System.nanoTime();
        for (int i = 0; i < 1000; i++) {
            try (final InputStream in = new FileInputStream(journalFile);
                final RecordReader reader = createReader(in, journalFile.getName(), new StandardTocReader(tocFile), 32 * 1024)) {
                for (final long id : eventIds) {
                    time(() -> {
                        reader.skipToEvent(id);
                        return reader.nextRecord();
                    }, id);
                }
            }
        }
        final long ms = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
        System.out.println(ms + " ms total");
    }
}
Also used : StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) File(java.io.File) FileInputStream(java.io.FileInputStream) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 10 with StandardTocReader

use of org.apache.nifi.provenance.toc.StandardTocReader in project nifi by apache.

the class RecordReaders method newRecordReader.

/**
 * Creates a new Record Reader that is capable of reading Provenance Event Journals
 *
 * @param file               the Provenance Event Journal to read data from
 * @param provenanceLogFiles collection of all provenance journal files
 * @param maxAttributeChars  the maximum number of characters to retrieve for any one attribute. This allows us to avoid
 *                           issues where a FlowFile has an extremely large attribute and reading events
 *                           for that FlowFile results in loading that attribute into memory many times, exhausting the Java Heap
 * @return a Record Reader capable of reading Provenance Event Journals
 * @throws IOException if unable to create a Record Reader for the given file
 */
public static RecordReader newRecordReader(File file, final Collection<Path> provenanceLogFiles, final int maxAttributeChars) throws IOException {
    final File originalFile = file;
    InputStream fis = null;
    try {
        if (!file.exists()) {
            if (provenanceLogFiles != null) {
                final String baseName = LuceneUtil.substringBefore(file.getName(), ".") + ".";
                for (final Path path : provenanceLogFiles) {
                    if (path.toFile().getName().startsWith(baseName)) {
                        file = path.toFile();
                        break;
                    }
                }
            }
        }
        if (file.exists()) {
            try {
                fis = new FileInputStream(file);
            } catch (final FileNotFoundException fnfe) {
                fis = null;
            }
        }
        String filename = file.getName();
        openStream: while (fis == null) {
            final File dir = file.getParentFile();
            final String baseName = LuceneUtil.substringBefore(file.getName(), ".prov");
            // compressing by the time that we are querying the data.
            for (final String extension : new String[] { ".prov.gz", ".prov" }) {
                file = new File(dir, baseName + extension);
                if (file.exists()) {
                    try {
                        fis = new FileInputStream(file);
                        filename = baseName + extension;
                        break openStream;
                    } catch (final FileNotFoundException fnfe) {
                        // file was modified by a RolloverAction after we verified that it exists but before we could
                        // create an InputStream for it. Start over.
                        fis = null;
                        continue openStream;
                    }
                }
            }
            break;
        }
        if (fis == null) {
            throw new FileNotFoundException("Unable to locate file " + originalFile);
        }
        final File tocFile = TocUtil.getTocFile(file);
        final InputStream bufferedInStream = new BufferedInputStream(fis);
        final String serializationName;
        try {
            bufferedInStream.mark(4096);
            final InputStream in = filename.endsWith(".gz") ? new GZIPInputStream(bufferedInStream) : bufferedInStream;
            final DataInputStream dis = new DataInputStream(in);
            serializationName = dis.readUTF();
            bufferedInStream.reset();
        } catch (final EOFException eof) {
            fis.close();
            return new EmptyRecordReader();
        }
        switch(serializationName) {
            case StandardRecordReader.SERIALIZATION_NAME:
                {
                    if (tocFile.exists()) {
                        final TocReader tocReader = new StandardTocReader(tocFile);
                        return new StandardRecordReader(bufferedInStream, filename, tocReader, maxAttributeChars);
                    } else {
                        return new StandardRecordReader(bufferedInStream, filename, maxAttributeChars);
                    }
                }
            case ByteArraySchemaRecordWriter.SERIALIZATION_NAME:
                {
                    if (tocFile.exists()) {
                        final TocReader tocReader = new StandardTocReader(tocFile);
                        return new ByteArraySchemaRecordReader(bufferedInStream, filename, tocReader, maxAttributeChars);
                    } else {
                        return new ByteArraySchemaRecordReader(bufferedInStream, filename, maxAttributeChars);
                    }
                }
            case EventIdFirstSchemaRecordWriter.SERIALIZATION_NAME:
                {
                    if (!tocFile.exists()) {
                        throw new FileNotFoundException("Cannot create TOC Reader because the file " + tocFile + " does not exist");
                    }
                    final TocReader tocReader = new StandardTocReader(tocFile);
                    return new EventIdFirstSchemaRecordReader(bufferedInStream, filename, tocReader, maxAttributeChars);
                }
            case EncryptedSchemaRecordReader.SERIALIZATION_NAME:
                {
                    if (!tocFile.exists()) {
                        throw new FileNotFoundException("Cannot create TOC Reader because the file " + tocFile + " does not exist");
                    }
                    if (!isEncryptionAvailable()) {
                        throw new IOException("Cannot read encrypted repository because this reader is not configured for encryption");
                    }
                    final TocReader tocReader = new StandardTocReader(tocFile);
                    // Return a reader with no eventEncryptor because this method contract cannot change, then inject the encryptor from the writer in the calling method
                    return new EncryptedSchemaRecordReader(bufferedInStream, filename, tocReader, maxAttributeChars, null);
                }
            default:
                {
                    throw new IOException("Unable to read data from file " + file + " because the file was written using an unknown Serializer: " + serializationName);
                }
        }
    } catch (final IOException ioe) {
        if (fis != null) {
            try {
                fis.close();
            } catch (final IOException inner) {
                ioe.addSuppressed(inner);
            }
        }
        throw ioe;
    }
}
Also used : Path(java.nio.file.Path) TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) EncryptedSchemaRecordReader(org.apache.nifi.provenance.EncryptedSchemaRecordReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) StandardRecordReader(org.apache.nifi.provenance.StandardRecordReader) DataInputStream(java.io.DataInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) FileInputStream(java.io.FileInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) ByteArraySchemaRecordReader(org.apache.nifi.provenance.ByteArraySchemaRecordReader) EOFException(java.io.EOFException) EventIdFirstSchemaRecordReader(org.apache.nifi.provenance.EventIdFirstSchemaRecordReader) File(java.io.File)

Aggregations

StandardTocReader (org.apache.nifi.provenance.toc.StandardTocReader)18 File (java.io.File)17 StandardTocWriter (org.apache.nifi.provenance.toc.StandardTocWriter)17 Test (org.junit.Test)16 FileInputStream (java.io.FileInputStream)15 RecordWriter (org.apache.nifi.provenance.serialization.RecordWriter)15 TocReader (org.apache.nifi.provenance.toc.TocReader)15 RecordReader (org.apache.nifi.provenance.serialization.RecordReader)14 TocWriter (org.apache.nifi.provenance.toc.TocWriter)14 HashMap (java.util.HashMap)8 InputStream (java.io.InputStream)6 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 DataOutputStream (java.io.DataOutputStream)3 EventRecord (org.apache.nifi.provenance.schema.EventRecord)3 NopTocWriter (org.apache.nifi.provenance.toc.NopTocWriter)3 RecordField (org.apache.nifi.repository.schema.RecordField)3 RecordSchema (org.apache.nifi.repository.schema.RecordSchema)3 SimpleRecordField (org.apache.nifi.repository.schema.SimpleRecordField)3