Search in sources :

Example 6 with TocReader

use of org.apache.nifi.provenance.toc.TocReader in project nifi by apache.

the class TestSchemaRecordReaderWriter method testFieldRemovedFromSchema.

@Test
public void testFieldRemovedFromSchema() throws IOException {
    final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
    try {
        // Create a schema that has the fields modified
        final RecordSchema schemaV1 = ProvenanceEventSchema.PROVENANCE_EVENT_SCHEMA_V1;
        final List<RecordField> fields = new ArrayList<>(schemaV1.getFields());
        fields.remove(new SimpleRecordField(EventFieldNames.UPDATED_ATTRIBUTES, FieldType.STRING, Repetition.EXACTLY_ONE));
        fields.remove(new SimpleRecordField(EventFieldNames.PREVIOUS_ATTRIBUTES, FieldType.STRING, Repetition.EXACTLY_ONE));
        final RecordSchema recordSchema = new RecordSchema(fields);
        // Create a record writer whose schema does not contain updated attributes or previous attributes.
        // This means that we must also override the method that writes out attributes so that we are able
        // to avoid actually writing them out.
        final ByteArraySchemaRecordWriter writer = new ByteArraySchemaRecordWriter(journalFile, idGenerator, tocWriter, false, 0) {

            @Override
            public void writeHeader(long firstEventId, DataOutputStream out) throws IOException {
                final ByteArrayOutputStream baos = new ByteArrayOutputStream();
                recordSchema.writeTo(baos);
                out.writeInt(baos.size());
                baos.writeTo(out);
            }

            @Override
            protected Record createRecord(final ProvenanceEventRecord event, final long eventId) {
                final RecordSchema contentClaimSchema = new RecordSchema(recordSchema.getField(EventFieldNames.CONTENT_CLAIM).getSubFields());
                return new EventRecord(event, eventId, recordSchema, contentClaimSchema);
            }
        };
        try {
            writer.writeHeader(1L);
            writer.writeRecord(createEvent());
            writer.writeRecord(createEvent());
        } finally {
            writer.close();
        }
    } finally {
        tocWriter.close();
    }
    // Read the records in and make sure that they have the info that we expect.
    try (final InputStream in = new FileInputStream(journalFile);
        final TocReader tocReader = new StandardTocReader(tocFile);
        final RecordReader reader = createReader(in, journalFile.getName(), tocReader, 10000)) {
        for (int i = 0; i < 2; i++) {
            final StandardProvenanceEventRecord event = reader.nextRecord();
            assertNotNull(event);
            assertEquals(ProvenanceEventType.RECEIVE, event.getEventType());
            // We will still have a Map<String, String> for updated attributes because the
            // Provenance Event Builder will create an empty map.
            assertNotNull(event.getUpdatedAttributes());
            assertTrue(event.getUpdatedAttributes().isEmpty());
        }
    }
}
Also used : TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) RecordField(org.apache.nifi.repository.schema.RecordField) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) DataOutputStream(java.io.DataOutputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) FileInputStream(java.io.FileInputStream) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) NopTocWriter(org.apache.nifi.provenance.toc.NopTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) EventRecord(org.apache.nifi.provenance.schema.EventRecord) RecordSchema(org.apache.nifi.repository.schema.RecordSchema) Test(org.junit.Test)

Example 7 with TocReader

use of org.apache.nifi.provenance.toc.TocReader in project nifi by apache.

the class TestSchemaRecordReaderWriter method testAddOneRecordReadTwice.

@Test
public void testAddOneRecordReadTwice() throws IOException {
    final RecordField unitTestField = new SimpleRecordField("Unit Test Field", FieldType.STRING, Repetition.EXACTLY_ONE);
    final Consumer<List<RecordField>> schemaModifier = fields -> fields.add(unitTestField);
    final Map<RecordField, Object> toAdd = new HashMap<>();
    toAdd.put(unitTestField, "hello");
    try (final ByteArraySchemaRecordWriter writer = createSchemaWriter(schemaModifier, toAdd)) {
        writer.writeHeader(1L);
        writer.writeRecord(createEvent());
    }
    try (final InputStream in = new FileInputStream(journalFile);
        final TocReader tocReader = new StandardTocReader(tocFile);
        final RecordReader reader = createReader(in, journalFile.getName(), tocReader, 10000)) {
        final ProvenanceEventRecord firstEvent = reader.nextRecord();
        assertNotNull(firstEvent);
        final ProvenanceEventRecord secondEvent = reader.nextRecord();
        assertNull(secondEvent);
    }
}
Also used : StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocReader(org.apache.nifi.provenance.toc.TocReader) Record(org.apache.nifi.repository.schema.Record) ByteArrayOutputStream(java.io.ByteArrayOutputStream) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) ArrayList(java.util.ArrayList) NopTocWriter(org.apache.nifi.provenance.toc.NopTocWriter) RecordSchema(org.apache.nifi.repository.schema.RecordSchema) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) TocWriter(org.apache.nifi.provenance.toc.TocWriter) FieldType(org.apache.nifi.repository.schema.FieldType) DataOutputStream(java.io.DataOutputStream) Map(java.util.Map) TocUtil(org.apache.nifi.provenance.toc.TocUtil) Repetition(org.apache.nifi.repository.schema.Repetition) Before(org.junit.Before) OutputStream(java.io.OutputStream) NullOutputStream(org.apache.nifi.stream.io.NullOutputStream) Assert.assertNotNull(org.junit.Assert.assertNotNull) EventFieldNames(org.apache.nifi.provenance.schema.EventFieldNames) RecordField(org.apache.nifi.repository.schema.RecordField) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Test(org.junit.Test) FileInputStream(java.io.FileInputStream) UUID(java.util.UUID) File(java.io.File) FieldMapRecord(org.apache.nifi.repository.schema.FieldMapRecord) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) Assert.assertNull(org.junit.Assert.assertNull) EventRecord(org.apache.nifi.provenance.schema.EventRecord) Ignore(org.junit.Ignore) Assert.assertFalse(org.junit.Assert.assertFalse) ProvenanceEventSchema(org.apache.nifi.provenance.schema.ProvenanceEventSchema) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) Assert(org.junit.Assert) Assert.assertEquals(org.junit.Assert.assertEquals) InputStream(java.io.InputStream) TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) RecordField(org.apache.nifi.repository.schema.RecordField) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) HashMap(java.util.HashMap) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) FileInputStream(java.io.FileInputStream) SimpleRecordField(org.apache.nifi.repository.schema.SimpleRecordField) ArrayList(java.util.ArrayList) List(java.util.List) Test(org.junit.Test)

Example 8 with TocReader

use of org.apache.nifi.provenance.toc.TocReader in project nifi by apache.

the class DocsReader method getByteOffset.

private long getByteOffset(final Document d, final RecordReader reader) {
    final IndexableField blockField = d.getField(FieldNames.BLOCK_INDEX);
    if (blockField != null) {
        final int blockIndex = blockField.numericValue().intValue();
        final TocReader tocReader = reader.getTocReader();
        return tocReader.getBlockOffset(blockIndex);
    }
    return d.getField(FieldNames.STORAGE_FILE_OFFSET).numericValue().longValue();
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) TocReader(org.apache.nifi.provenance.toc.TocReader)

Example 9 with TocReader

use of org.apache.nifi.provenance.toc.TocReader in project nifi by apache.

the class RecordReaders method newRecordReader.

/**
 * Creates a new Record Reader that is capable of reading Provenance Event Journals
 *
 * @param file               the Provenance Event Journal to read data from
 * @param provenanceLogFiles collection of all provenance journal files
 * @param maxAttributeChars  the maximum number of characters to retrieve for any one attribute. This allows us to avoid
 *                           issues where a FlowFile has an extremely large attribute and reading events
 *                           for that FlowFile results in loading that attribute into memory many times, exhausting the Java Heap
 * @return a Record Reader capable of reading Provenance Event Journals
 * @throws IOException if unable to create a Record Reader for the given file
 */
public static RecordReader newRecordReader(File file, final Collection<Path> provenanceLogFiles, final int maxAttributeChars) throws IOException {
    final File originalFile = file;
    InputStream fis = null;
    try {
        if (!file.exists()) {
            if (provenanceLogFiles != null) {
                final String baseName = LuceneUtil.substringBefore(file.getName(), ".") + ".";
                for (final Path path : provenanceLogFiles) {
                    if (path.toFile().getName().startsWith(baseName)) {
                        file = path.toFile();
                        break;
                    }
                }
            }
        }
        if (file.exists()) {
            try {
                fis = new FileInputStream(file);
            } catch (final FileNotFoundException fnfe) {
                fis = null;
            }
        }
        String filename = file.getName();
        openStream: while (fis == null) {
            final File dir = file.getParentFile();
            final String baseName = LuceneUtil.substringBefore(file.getName(), ".prov");
            // compressing by the time that we are querying the data.
            for (final String extension : new String[] { ".prov.gz", ".prov" }) {
                file = new File(dir, baseName + extension);
                if (file.exists()) {
                    try {
                        fis = new FileInputStream(file);
                        filename = baseName + extension;
                        break openStream;
                    } catch (final FileNotFoundException fnfe) {
                        // file was modified by a RolloverAction after we verified that it exists but before we could
                        // create an InputStream for it. Start over.
                        fis = null;
                        continue openStream;
                    }
                }
            }
            break;
        }
        if (fis == null) {
            throw new FileNotFoundException("Unable to locate file " + originalFile);
        }
        final File tocFile = TocUtil.getTocFile(file);
        final InputStream bufferedInStream = new BufferedInputStream(fis);
        final String serializationName;
        try {
            bufferedInStream.mark(4096);
            final InputStream in = filename.endsWith(".gz") ? new GZIPInputStream(bufferedInStream) : bufferedInStream;
            final DataInputStream dis = new DataInputStream(in);
            serializationName = dis.readUTF();
            bufferedInStream.reset();
        } catch (final EOFException eof) {
            fis.close();
            return new EmptyRecordReader();
        }
        switch(serializationName) {
            case StandardRecordReader.SERIALIZATION_NAME:
                {
                    if (tocFile.exists()) {
                        final TocReader tocReader = new StandardTocReader(tocFile);
                        return new StandardRecordReader(bufferedInStream, filename, tocReader, maxAttributeChars);
                    } else {
                        return new StandardRecordReader(bufferedInStream, filename, maxAttributeChars);
                    }
                }
            case ByteArraySchemaRecordWriter.SERIALIZATION_NAME:
                {
                    if (tocFile.exists()) {
                        final TocReader tocReader = new StandardTocReader(tocFile);
                        return new ByteArraySchemaRecordReader(bufferedInStream, filename, tocReader, maxAttributeChars);
                    } else {
                        return new ByteArraySchemaRecordReader(bufferedInStream, filename, maxAttributeChars);
                    }
                }
            case EventIdFirstSchemaRecordWriter.SERIALIZATION_NAME:
                {
                    if (!tocFile.exists()) {
                        throw new FileNotFoundException("Cannot create TOC Reader because the file " + tocFile + " does not exist");
                    }
                    final TocReader tocReader = new StandardTocReader(tocFile);
                    return new EventIdFirstSchemaRecordReader(bufferedInStream, filename, tocReader, maxAttributeChars);
                }
            case EncryptedSchemaRecordReader.SERIALIZATION_NAME:
                {
                    if (!tocFile.exists()) {
                        throw new FileNotFoundException("Cannot create TOC Reader because the file " + tocFile + " does not exist");
                    }
                    if (!isEncryptionAvailable()) {
                        throw new IOException("Cannot read encrypted repository because this reader is not configured for encryption");
                    }
                    final TocReader tocReader = new StandardTocReader(tocFile);
                    // Return a reader with no eventEncryptor because this method contract cannot change, then inject the encryptor from the writer in the calling method
                    return new EncryptedSchemaRecordReader(bufferedInStream, filename, tocReader, maxAttributeChars, null);
                }
            default:
                {
                    throw new IOException("Unable to read data from file " + file + " because the file was written using an unknown Serializer: " + serializationName);
                }
        }
    } catch (final IOException ioe) {
        if (fis != null) {
            try {
                fis.close();
            } catch (final IOException inner) {
                ioe.addSuppressed(inner);
            }
        }
        throw ioe;
    }
}
Also used : Path(java.nio.file.Path) TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) EncryptedSchemaRecordReader(org.apache.nifi.provenance.EncryptedSchemaRecordReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) StandardRecordReader(org.apache.nifi.provenance.StandardRecordReader) DataInputStream(java.io.DataInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) FileInputStream(java.io.FileInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) ByteArraySchemaRecordReader(org.apache.nifi.provenance.ByteArraySchemaRecordReader) EOFException(java.io.EOFException) EventIdFirstSchemaRecordReader(org.apache.nifi.provenance.EventIdFirstSchemaRecordReader) File(java.io.File)

Example 10 with TocReader

use of org.apache.nifi.provenance.toc.TocReader in project nifi-minifi by apache.

the class MiNiFiPersistentProvenanceRepository method getEvents.

@Override
public List<ProvenanceEventRecord> getEvents(final long firstRecordId, final int maxRecords, final NiFiUser user) throws IOException {
    final List<ProvenanceEventRecord> records = new ArrayList<>(maxRecords);
    final List<Path> paths = getPathsForId(firstRecordId);
    if (paths == null || paths.isEmpty()) {
        return records;
    }
    for (final Path path : paths) {
        try (RecordReader reader = RecordReaders.newRecordReader(path.toFile(), getAllLogFiles(), maxAttributeChars)) {
            // just to get to the first record that we want.
            if (records.isEmpty()) {
                final TocReader tocReader = reader.getTocReader();
                if (tocReader != null) {
                    final Integer blockIndex = tocReader.getBlockIndexForEventId(firstRecordId);
                    if (blockIndex != null) {
                        reader.skipToBlock(blockIndex);
                    }
                }
            }
            StandardProvenanceEventRecord record;
            while (records.size() < maxRecords && (record = reader.nextRecord()) != null) {
                if (record.getEventId() >= firstRecordId && isAuthorized(record, user)) {
                    records.add(record);
                }
            }
        } catch (final EOFException | FileNotFoundException fnfe) {
        // assume file aged off (or there's no data in file, in case of EOFException, which indicates that data was cached
        // in operating system and entire O/S crashed and always.sync was not turned on.)
        } catch (final IOException ioe) {
            logger.error("Failed to read Provenance Event File {} due to {}", path.toFile(), ioe.toString());
            logger.error("", ioe);
            eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to read Provenance Event File " + path.toFile() + " due to " + ioe.toString());
        }
        if (records.size() >= maxRecords) {
            break;
        }
    }
    if (logger.isDebugEnabled()) {
        logger.debug("Retrieving up to {} records starting at Event ID {}; returning {} events", maxRecords, firstRecordId, records.size());
    }
    return records;
}
Also used : Path(java.nio.file.Path) TocReader(org.apache.nifi.provenance.toc.TocReader) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) EOFException(java.io.EOFException)

Aggregations

TocReader (org.apache.nifi.provenance.toc.TocReader)20 RecordReader (org.apache.nifi.provenance.serialization.RecordReader)16 StandardTocReader (org.apache.nifi.provenance.toc.StandardTocReader)16 Test (org.junit.Test)16 File (java.io.File)14 FileInputStream (java.io.FileInputStream)14 RecordWriter (org.apache.nifi.provenance.serialization.RecordWriter)14 StandardTocWriter (org.apache.nifi.provenance.toc.StandardTocWriter)14 TocWriter (org.apache.nifi.provenance.toc.TocWriter)14 HashMap (java.util.HashMap)8 InputStream (java.io.InputStream)6 ArrayList (java.util.ArrayList)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 DataOutputStream (java.io.DataOutputStream)5 IOException (java.io.IOException)5 EOFException (java.io.EOFException)3 FileNotFoundException (java.io.FileNotFoundException)3 Path (java.nio.file.Path)3 EventRecord (org.apache.nifi.provenance.schema.EventRecord)3 NopTocWriter (org.apache.nifi.provenance.toc.NopTocWriter)3