Search in sources :

Example 31 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class PersistentProvenanceRepository method getEvents.

@Override
public List<ProvenanceEventRecord> getEvents(final long firstRecordId, final int maxRecords, final NiFiUser user) throws IOException {
    final List<ProvenanceEventRecord> records = new ArrayList<>(maxRecords);
    final List<Path> paths = getPathsForId(firstRecordId);
    if (paths == null || paths.isEmpty()) {
        return records;
    }
    for (final Path path : paths) {
        try (RecordReader reader = RecordReaders.newRecordReader(path.toFile(), getAllLogFiles(), maxAttributeChars)) {
            // just to get to the first record that we want.
            if (records.isEmpty()) {
                final TocReader tocReader = reader.getTocReader();
                if (tocReader != null) {
                    final Integer blockIndex = tocReader.getBlockIndexForEventId(firstRecordId);
                    if (blockIndex != null) {
                        reader.skipToBlock(blockIndex);
                    }
                }
            }
            StandardProvenanceEventRecord record;
            while (records.size() < maxRecords && (record = reader.nextRecord()) != null) {
                if (record.getEventId() >= firstRecordId && isAuthorized(record, user)) {
                    records.add(record);
                }
            }
        } catch (final EOFException | FileNotFoundException fnfe) {
        // assume file aged off (or there's no data in file, in case of EOFException, which indicates that data was cached
        // in operating system and entire O/S crashed and always.sync was not turned on.)
        } catch (final IOException ioe) {
            logger.error("Failed to read Provenance Event File {} due to {}", path.toFile(), ioe.toString());
            logger.error("", ioe);
            eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to read Provenance Event File " + path.toFile() + " due to " + ioe.toString());
        }
        if (records.size() >= maxRecords) {
            break;
        }
    }
    if (logger.isDebugEnabled()) {
        logger.debug("Retrieving up to {} records starting at Event ID {}; returning {} events", maxRecords, firstRecordId, records.size());
    }
    return records;
}
Also used : Path(java.nio.file.Path) TocReader(org.apache.nifi.provenance.toc.TocReader) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) EOFException(java.io.EOFException)

Example 32 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class PersistentProvenanceRepository method purgeExpiredIndexes.

private void purgeExpiredIndexes() throws IOException {
    // Now that we have potentially removed expired Provenance Event Log Files, we can look at
    // whether or not we can delete any of the indexes. An index can be deleted if all of the
    // data that is associated with that index has already been deleted. In order to test this,
    // we will get the timestamp of the earliest event and then compare that to the latest timestamp
    // that would be indexed by the earliest index. If the event occurred after the timestamp of
    // the latest index, then we can just delete the entire index all together.
    // find all of the index directories
    final List<File> indexDirs = getAllIndexDirectories();
    if (indexDirs.size() < 2) {
        this.firstEventTimestamp = determineFirstEventTimestamp();
        return;
    }
    // Indexes are named "index-XXX" where the XXX is the timestamp of the earliest event that
    // could be in the index. Once we have finished with one index, we move on to another index,
    // but we don't move on until we are finished with the previous index.
    // Therefore, an efficient way to determine the latest timestamp of one index is to look at the
    // timestamp of the next index (these could potentially overlap for one millisecond). This is
    // efficient because we can determine the earliest timestamp of an index simply by looking at
    // the name of the Index's directory.
    final long latestTimestampOfFirstIndex = getIndexTimestamp(indexDirs.get(1));
    // Get the timestamp of the first event in the first Provenance Event Log File and the ID of the last event
    // in the event file.
    final List<File> logFiles = getSortedLogFiles();
    if (logFiles.isEmpty()) {
        this.firstEventTimestamp = System.currentTimeMillis();
        return;
    }
    final File firstLogFile = logFiles.get(0);
    long earliestEventTime = System.currentTimeMillis();
    long maxEventId = -1L;
    try (final RecordReader reader = RecordReaders.newRecordReader(firstLogFile, null, Integer.MAX_VALUE)) {
        final StandardProvenanceEventRecord event = reader.nextRecord();
        earliestEventTime = event.getEventTime();
        maxEventId = reader.getMaxEventId();
    } catch (final IOException ioe) {
        logger.warn("Unable to determine the maximum ID for Provenance Event Log File {}; values reported for the number of " + "events in the Provenance Repository may be inaccurate.", firstLogFile);
    }
    // check if we can delete the index safely.
    if (latestTimestampOfFirstIndex <= earliestEventTime) {
        // we can safely delete the first index because the latest event in the index is an event
        // that has already been expired from the repository.
        final File indexingDirectory = indexDirs.get(0);
        getIndexManager().removeIndex(indexingDirectory);
        indexConfig.removeIndexDirectory(indexingDirectory);
        deleteDirectory(indexingDirectory);
        if (maxEventId > -1L) {
            indexConfig.setMinIdIndexed(maxEventId + 1L);
        }
    }
    this.firstEventTimestamp = earliestEventTime;
}
Also used : RecordReader(org.apache.nifi.provenance.serialization.RecordReader) IOException(java.io.IOException) File(java.io.File)

Example 33 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class UpdateMinimumEventId method execute.

@Override
public File execute(final File expiredFile) throws IOException {
    try (final RecordReader reader = RecordReaders.newRecordReader(expiredFile, null, Integer.MAX_VALUE)) {
        final long maxEventId = reader.getMaxEventId();
        indexConfig.setMinIdIndexed(maxEventId);
        logger.info("Updated Minimum Event ID for Provenance Event Repository - Minimum Event ID now {}", maxEventId);
    } catch (final IOException ioe) {
        logger.warn("Failed to obtain max ID present in journal file {}", expiredFile.getAbsolutePath());
    }
    return expiredFile;
}
Also used : RecordReader(org.apache.nifi.provenance.serialization.RecordReader) IOException(java.io.IOException)

Example 34 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class WriteAheadStorePartition method getEvent.

@Override
public Optional<ProvenanceEventRecord> getEvent(final long id) throws IOException {
    final Optional<File> option = getPathForEventId(id);
    if (!option.isPresent()) {
        return Optional.empty();
    }
    try (final RecordReader reader = recordReaderFactory.newRecordReader(option.get(), Collections.emptyList(), config.getMaxAttributeChars())) {
        final Optional<ProvenanceEventRecord> eventOption = reader.skipToEvent(id);
        if (!eventOption.isPresent()) {
            return eventOption;
        }
        // If an event is returned, the event may be the one we want, or it may be an event with a
        // higher event ID, if the desired event is not in the record reader. So we need to get the
        // event and check the Event ID to know whether to return the empty optional or the Optional
        // that was returned.
        final ProvenanceEventRecord event = eventOption.get();
        if (event.getEventId() == id) {
            return eventOption;
        } else {
            return Optional.empty();
        }
    }
}
Also used : RecordReader(org.apache.nifi.provenance.serialization.RecordReader) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) File(java.io.File)

Example 35 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class DumpEventFile method main.

public static void main(final String[] args) throws IOException {
    if (args.length != 1) {
        printUsage();
        return;
    }
    final File file = new File(args[0]);
    if (!file.exists()) {
        System.out.println("Cannot find file " + file.getAbsolutePath());
        return;
    }
    try (final RecordReader reader = RecordReaders.newRecordReader(file, Collections.emptyList(), 65535)) {
        StandardProvenanceEventRecord event;
        int index = 0;
        while ((event = reader.nextRecord()) != null) {
            final long byteOffset = reader.getBytesConsumed();
            final String string = stringify(event, index++, byteOffset);
            System.out.println(string);
        }
    }
}
Also used : StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) File(java.io.File)

Aggregations

RecordReader (org.apache.nifi.provenance.serialization.RecordReader)37 File (java.io.File)30 Test (org.junit.Test)18 FileInputStream (java.io.FileInputStream)16 IOException (java.io.IOException)16 RecordWriter (org.apache.nifi.provenance.serialization.RecordWriter)16 TocReader (org.apache.nifi.provenance.toc.TocReader)16 StandardTocReader (org.apache.nifi.provenance.toc.StandardTocReader)15 StandardTocWriter (org.apache.nifi.provenance.toc.StandardTocWriter)14 ArrayList (java.util.ArrayList)13 HashMap (java.util.HashMap)12 TocWriter (org.apache.nifi.provenance.toc.TocWriter)12 InputStream (java.io.InputStream)7 EOFException (java.io.EOFException)6 AtomicLong (java.util.concurrent.atomic.AtomicLong)6 Ignore (org.junit.Ignore)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 DataOutputStream (java.io.DataOutputStream)5 Path (java.nio.file.Path)5 Callable (java.util.concurrent.Callable)5