Search in sources :

Example 1 with StandardRecordReader

use of org.apache.nifi.provenance.StandardRecordReader in project nifi by apache.

the class RecordReaders method newRecordReader.

/**
 * Creates a new Record Reader that is capable of reading Provenance Event Journals
 *
 * @param file               the Provenance Event Journal to read data from
 * @param provenanceLogFiles collection of all provenance journal files
 * @param maxAttributeChars  the maximum number of characters to retrieve for any one attribute. This allows us to avoid
 *                           issues where a FlowFile has an extremely large attribute and reading events
 *                           for that FlowFile results in loading that attribute into memory many times, exhausting the Java Heap
 * @return a Record Reader capable of reading Provenance Event Journals
 * @throws IOException if unable to create a Record Reader for the given file
 */
public static RecordReader newRecordReader(File file, final Collection<Path> provenanceLogFiles, final int maxAttributeChars) throws IOException {
    final File originalFile = file;
    InputStream fis = null;
    try {
        if (!file.exists()) {
            if (provenanceLogFiles != null) {
                final String baseName = LuceneUtil.substringBefore(file.getName(), ".") + ".";
                for (final Path path : provenanceLogFiles) {
                    if (path.toFile().getName().startsWith(baseName)) {
                        file = path.toFile();
                        break;
                    }
                }
            }
        }
        if (file.exists()) {
            try {
                fis = new FileInputStream(file);
            } catch (final FileNotFoundException fnfe) {
                fis = null;
            }
        }
        String filename = file.getName();
        openStream: while (fis == null) {
            final File dir = file.getParentFile();
            final String baseName = LuceneUtil.substringBefore(file.getName(), ".prov");
            // compressing by the time that we are querying the data.
            for (final String extension : new String[] { ".prov.gz", ".prov" }) {
                file = new File(dir, baseName + extension);
                if (file.exists()) {
                    try {
                        fis = new FileInputStream(file);
                        filename = baseName + extension;
                        break openStream;
                    } catch (final FileNotFoundException fnfe) {
                        // file was modified by a RolloverAction after we verified that it exists but before we could
                        // create an InputStream for it. Start over.
                        fis = null;
                        continue openStream;
                    }
                }
            }
            break;
        }
        if (fis == null) {
            throw new FileNotFoundException("Unable to locate file " + originalFile);
        }
        final File tocFile = TocUtil.getTocFile(file);
        final InputStream bufferedInStream = new BufferedInputStream(fis);
        final String serializationName;
        try {
            bufferedInStream.mark(4096);
            final InputStream in = filename.endsWith(".gz") ? new GZIPInputStream(bufferedInStream) : bufferedInStream;
            final DataInputStream dis = new DataInputStream(in);
            serializationName = dis.readUTF();
            bufferedInStream.reset();
        } catch (final EOFException eof) {
            fis.close();
            return new EmptyRecordReader();
        }
        switch(serializationName) {
            case StandardRecordReader.SERIALIZATION_NAME:
                {
                    if (tocFile.exists()) {
                        final TocReader tocReader = new StandardTocReader(tocFile);
                        return new StandardRecordReader(bufferedInStream, filename, tocReader, maxAttributeChars);
                    } else {
                        return new StandardRecordReader(bufferedInStream, filename, maxAttributeChars);
                    }
                }
            case ByteArraySchemaRecordWriter.SERIALIZATION_NAME:
                {
                    if (tocFile.exists()) {
                        final TocReader tocReader = new StandardTocReader(tocFile);
                        return new ByteArraySchemaRecordReader(bufferedInStream, filename, tocReader, maxAttributeChars);
                    } else {
                        return new ByteArraySchemaRecordReader(bufferedInStream, filename, maxAttributeChars);
                    }
                }
            case EventIdFirstSchemaRecordWriter.SERIALIZATION_NAME:
                {
                    if (!tocFile.exists()) {
                        throw new FileNotFoundException("Cannot create TOC Reader because the file " + tocFile + " does not exist");
                    }
                    final TocReader tocReader = new StandardTocReader(tocFile);
                    return new EventIdFirstSchemaRecordReader(bufferedInStream, filename, tocReader, maxAttributeChars);
                }
            case EncryptedSchemaRecordReader.SERIALIZATION_NAME:
                {
                    if (!tocFile.exists()) {
                        throw new FileNotFoundException("Cannot create TOC Reader because the file " + tocFile + " does not exist");
                    }
                    if (!isEncryptionAvailable()) {
                        throw new IOException("Cannot read encrypted repository because this reader is not configured for encryption");
                    }
                    final TocReader tocReader = new StandardTocReader(tocFile);
                    // Return a reader with no eventEncryptor because this method contract cannot change, then inject the encryptor from the writer in the calling method
                    return new EncryptedSchemaRecordReader(bufferedInStream, filename, tocReader, maxAttributeChars, null);
                }
            default:
                {
                    throw new IOException("Unable to read data from file " + file + " because the file was written using an unknown Serializer: " + serializationName);
                }
        }
    } catch (final IOException ioe) {
        if (fis != null) {
            try {
                fis.close();
            } catch (final IOException inner) {
                ioe.addSuppressed(inner);
            }
        }
        throw ioe;
    }
}
Also used : Path(java.nio.file.Path) TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) EncryptedSchemaRecordReader(org.apache.nifi.provenance.EncryptedSchemaRecordReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) StandardRecordReader(org.apache.nifi.provenance.StandardRecordReader) DataInputStream(java.io.DataInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) FileInputStream(java.io.FileInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) ByteArraySchemaRecordReader(org.apache.nifi.provenance.ByteArraySchemaRecordReader) EOFException(java.io.EOFException) EventIdFirstSchemaRecordReader(org.apache.nifi.provenance.EventIdFirstSchemaRecordReader) File(java.io.File)

Aggregations

BufferedInputStream (java.io.BufferedInputStream)1 DataInputStream (java.io.DataInputStream)1 EOFException (java.io.EOFException)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 Path (java.nio.file.Path)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 ByteArraySchemaRecordReader (org.apache.nifi.provenance.ByteArraySchemaRecordReader)1 EncryptedSchemaRecordReader (org.apache.nifi.provenance.EncryptedSchemaRecordReader)1 EventIdFirstSchemaRecordReader (org.apache.nifi.provenance.EventIdFirstSchemaRecordReader)1 StandardRecordReader (org.apache.nifi.provenance.StandardRecordReader)1 StandardTocReader (org.apache.nifi.provenance.toc.StandardTocReader)1 TocReader (org.apache.nifi.provenance.toc.TocReader)1