Search in sources :

Example 11 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class PersistentProvenanceRepository method recover.

private void recover() throws IOException {
    long maxId = -1L;
    long maxIndexedId = -1L;
    long minIndexedId = Long.MAX_VALUE;
    final List<File> filesToRecover = new ArrayList<>();
    for (final File file : configuration.getStorageDirectories().values()) {
        final File[] matchingFiles = file.listFiles(new FileFilter() {

            @Override
            public boolean accept(final File pathname) {
                final String filename = pathname.getName();
                if (!filename.contains(FILE_EXTENSION) || filename.endsWith(TEMP_FILE_SUFFIX)) {
                    return false;
                }
                final String baseFilename = filename.substring(0, filename.indexOf("."));
                return NUMBER_PATTERN.matcher(baseFilename).matches();
            }
        });
        for (final File matchingFile : matchingFiles) {
            filesToRecover.add(matchingFile);
        }
    }
    final SortedMap<Long, Path> sortedPathMap = new TreeMap<>(new Comparator<Long>() {

        @Override
        public int compare(final Long o1, final Long o2) {
            return Long.compare(o1, o2);
        }
    });
    File maxIdFile = null;
    for (final File file : filesToRecover) {
        final String filename = file.getName();
        final String baseName = filename.substring(0, filename.indexOf("."));
        final long firstId = Long.parseLong(baseName);
        sortedPathMap.put(firstId, file.toPath());
        if (firstId > maxId) {
            maxId = firstId;
            maxIdFile = file;
        }
        if (firstId > maxIndexedId) {
            maxIndexedId = firstId - 1;
        }
        if (firstId < minIndexedId) {
            minIndexedId = firstId;
        }
    }
    if (maxIdFile != null) {
        // Determine the max ID in the last file.
        try (final RecordReader reader = RecordReaders.newRecordReader(maxIdFile, getAllLogFiles(), maxAttributeChars)) {
            final long eventId = reader.getMaxEventId();
            if (eventId > maxId) {
                maxId = eventId;
            }
            // update the max indexed id
            if (eventId > maxIndexedId) {
                maxIndexedId = eventId;
            }
        } catch (final IOException ioe) {
            logger.error("Failed to read Provenance Event File {} due to {}", maxIdFile, ioe);
            logger.error("", ioe);
        }
    }
    if (maxIndexedId > -1L) {
        // If we have indexed anything then set the min/max ID's indexed.
        indexConfig.setMaxIdIndexed(maxIndexedId);
    }
    if (minIndexedId < Long.MAX_VALUE) {
        indexConfig.setMinIdIndexed(minIndexedId);
    }
    idGenerator.set(maxId + 1);
    try {
        final Set<File> recoveredJournals = recoverJournalFiles();
        filesToRecover.addAll(recoveredJournals);
        // Find the file that has the greatest ID
        File greatestMinIdFile = null;
        long greatestMinId = 0L;
        for (final File recoveredJournal : recoveredJournals) {
            // if the file was removed because the journals were empty, don't count it
            if (!recoveredJournal.exists()) {
                continue;
            }
            final String basename = LuceneUtil.substringBefore(recoveredJournal.getName(), ".");
            try {
                final long minId = Long.parseLong(basename);
                sortedPathMap.put(minId, recoveredJournal.toPath());
                if (greatestMinIdFile == null || minId > greatestMinId) {
                    greatestMinId = minId;
                    greatestMinIdFile = recoveredJournal;
                }
            } catch (final NumberFormatException nfe) {
            // not a file we care about...
            }
        }
        // Read the records in the last file to find its max id
        if (greatestMinIdFile != null) {
            try (final RecordReader recordReader = RecordReaders.newRecordReader(greatestMinIdFile, Collections.<Path>emptyList(), maxAttributeChars)) {
                maxId = recordReader.getMaxEventId();
            }
        }
        // set the ID Generator 1 greater than the max id
        idGenerator.set(maxId + 1);
    } catch (final IOException ioe) {
        logger.error("Failed to recover Journal Files due to {}", ioe.toString());
        logger.error("", ioe);
    }
    idToPathMap.set(Collections.unmodifiableSortedMap(sortedPathMap));
    logger.trace("In recovery, path map: {}", sortedPathMap);
    final long recordsRecovered;
    if (minIndexedId < Long.MAX_VALUE) {
        recordsRecovered = idGenerator.get() - minIndexedId;
    } else {
        recordsRecovered = idGenerator.get();
    }
    logger.info("Recovered {} records", recordsRecovered);
    recoveryFinished.set(true);
}
Also used : Path(java.nio.file.Path) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) ArrayList(java.util.ArrayList) IOException(java.io.IOException) TreeMap(java.util.TreeMap) AtomicLong(java.util.concurrent.atomic.AtomicLong) TimestampedLong(org.apache.nifi.util.timebuffer.TimestampedLong) FileFilter(java.io.FileFilter) File(java.io.File)

Example 12 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class PersistentProvenanceRepository method queryLucene.

/**
 * This is for testing only and not actually used other than in debugging
 *
 * @param luceneQuery the lucene query to execute
 * @return an Iterator of ProvenanceEventRecord that match the query
 * @throws IOException if unable to perform the query
 */
Iterator<ProvenanceEventRecord> queryLucene(final org.apache.lucene.search.Query luceneQuery) throws IOException {
    final List<File> indexFiles = indexConfig.getIndexDirectories();
    final AtomicLong hits = new AtomicLong(0L);
    final List<Future<List<Document>>> futures = new ArrayList<>();
    for (final File indexDirectory : indexFiles) {
        final Callable<List<Document>> callable = new Callable<List<Document>>() {

            @Override
            public List<Document> call() {
                final List<Document> localScoreDocs = new ArrayList<>();
                try (final DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(indexDirectory))) {
                    final IndexSearcher searcher = new IndexSearcher(directoryReader);
                    final TopDocs topDocs = searcher.search(luceneQuery, 10000000);
                    logger.info("For {}, Top Docs has {} hits; reading Lucene results", indexDirectory, topDocs.scoreDocs.length);
                    if (topDocs.totalHits > 0) {
                        for (final ScoreDoc scoreDoc : topDocs.scoreDocs) {
                            final int docId = scoreDoc.doc;
                            final Document d = directoryReader.document(docId);
                            localScoreDocs.add(d);
                        }
                    }
                    hits.addAndGet(localScoreDocs.size());
                } catch (final IndexNotFoundException e) {
                } catch (final IOException ioe) {
                    throw new RuntimeException(ioe);
                }
                return localScoreDocs;
            }
        };
        final Future<List<Document>> future = queryExecService.submit(callable);
        futures.add(future);
    }
    logger.info("Merging results of Lucene query ({} hits)", hits.get());
    List<Document> scoreDocs = null;
    int idx = 0;
    for (final Future<List<Document>> future : futures) {
        try {
            final List<Document> docs = future.get();
            if (idx++ == 0) {
                scoreDocs = docs;
            } else {
                scoreDocs.addAll(docs);
                docs.clear();
            }
        } catch (final ExecutionException | InterruptedException ee) {
            throw new RuntimeException(ee);
        }
    }
    logger.info("Finished querying Lucene; there are {} docs; sorting for retrieval", scoreDocs.size());
    LuceneUtil.sortDocsForRetrieval(scoreDocs);
    logger.info("Finished sorting for retrieval. Returning Iterator.");
    final Iterator<Document> docItr = scoreDocs.iterator();
    final Collection<Path> allLogFiles = getAllLogFiles();
    return new Iterator<ProvenanceEventRecord>() {

        int count = 0;

        RecordReader reader = null;

        String lastStorageFilename = null;

        long lastByteOffset = 0L;

        @Override
        public boolean hasNext() {
            return docItr.hasNext();
        }

        @Override
        public ProvenanceEventRecord next() {
            if (count++ > 0) {
                // remove last document so that we don't hold everything in memory.
                docItr.remove();
            }
            final Document doc = docItr.next();
            final String storageFilename = doc.getField(FieldNames.STORAGE_FILENAME).stringValue();
            final long byteOffset = doc.getField(FieldNames.STORAGE_FILE_OFFSET).numericValue().longValue();
            try {
                if (reader != null && storageFilename.equals(lastStorageFilename) && byteOffset > lastByteOffset) {
                    // Still the same file and the offset is downstream.
                    try {
                        reader.skipTo(byteOffset);
                        final StandardProvenanceEventRecord record = reader.nextRecord();
                        return record;
                    } catch (final IOException e) {
                        if (hasNext()) {
                            return next();
                        } else {
                            return null;
                        }
                    }
                } else {
                    if (reader != null) {
                        try {
                            reader.close();
                        } catch (final IOException ioe) {
                        }
                    }
                    final List<File> potentialFiles = LuceneUtil.getProvenanceLogFiles(storageFilename, allLogFiles);
                    if (potentialFiles.isEmpty()) {
                        if (hasNext()) {
                            return next();
                        } else {
                            return null;
                        }
                    }
                    if (potentialFiles.size() > 1) {
                        if (hasNext()) {
                            return next();
                        } else {
                            return null;
                        }
                    }
                    for (final File file : potentialFiles) {
                        try {
                            reader = RecordReaders.newRecordReader(file, allLogFiles, maxAttributeChars);
                        } catch (final IOException ioe) {
                            continue;
                        }
                        try {
                            reader.skip(byteOffset);
                            final StandardProvenanceEventRecord record = reader.nextRecord();
                            return record;
                        } catch (final IOException e) {
                            continue;
                        }
                    }
                }
            } finally {
                lastStorageFilename = storageFilename;
                lastByteOffset = byteOffset;
            }
            return null;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    };
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) Callable(java.util.concurrent.Callable) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) List(java.util.List) ExecutionException(java.util.concurrent.ExecutionException) Path(java.nio.file.Path) DirectoryReader(org.apache.lucene.index.DirectoryReader) IOException(java.io.IOException) AtomicLong(java.util.concurrent.atomic.AtomicLong) Future(java.util.concurrent.Future) IndexNotFoundException(org.apache.lucene.index.IndexNotFoundException) File(java.io.File)

Example 13 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class PersistentProvenanceRepository method mergeJournals.

/**
 * <p>
 * Merges all of the given Journal Files into a single, merged Provenance
 * Event Log File. As these records are merged, they will be compressed, if
 * the repository is configured to compress records, and will be indexed.
 * </p>
 *
 * <p>
 * If the repository is configured to compress the data, the file written to
 * may not be the same as the <code>suggestedMergeFile</code>, as a filename
 * extension of '.gz' may be appended. If the journals are successfully
 * merged, the file that they were merged into will be returned. If unable
 * to merge the records (for instance, because the repository has been
 * closed or because the list of journal files was empty), this method will
 * return <code>null</code>.
 * </p>
 *
 * @param journalFiles the journal files to merge
 * @param suggestedMergeFile the file to write the merged records to
 * @param eventReporter the event reporter to report any warnings or errors
 * to; may be null.
 *
 * @return the file that the given journals were merged into, or
 * <code>null</code> if no records were merged.
 *
 * @throws IOException if a problem occurs writing to the mergedFile,
 * reading from a journal, or updating the Lucene Index.
 */
File mergeJournals(final List<File> journalFiles, final File suggestedMergeFile, final EventReporter eventReporter) throws IOException {
    logger.debug("Merging {} to {}", journalFiles, suggestedMergeFile);
    if (this.closed.get()) {
        logger.info("Provenance Repository has been closed; will not merge journal files to {}", suggestedMergeFile);
        return null;
    }
    if (journalFiles.isEmpty()) {
        logger.debug("Couldn't merge journals: Journal Files is empty; won't merge journals");
        return null;
    }
    Collections.sort(journalFiles, new Comparator<File>() {

        @Override
        public int compare(final File o1, final File o2) {
            final String suffix1 = LuceneUtil.substringAfterLast(o1.getName(), ".");
            final String suffix2 = LuceneUtil.substringAfterLast(o2.getName(), ".");
            try {
                final int journalIndex1 = Integer.parseInt(suffix1);
                final int journalIndex2 = Integer.parseInt(suffix2);
                return Integer.compare(journalIndex1, journalIndex2);
            } catch (final NumberFormatException nfe) {
                return o1.getName().compareTo(o2.getName());
            }
        }
    });
    // Search for any missing files. At this point they should have been written to disk otherwise cannot continue.
    // Missing files is most likely due to incomplete cleanup of files post merge
    final List<File> availableFiles = filterUnavailableFiles(journalFiles);
    final int numAvailableFiles = availableFiles.size();
    // check if we have all of the "partial" files for the journal.
    if (numAvailableFiles > 0) {
        if (suggestedMergeFile.exists()) {
            // we have all "partial" files and there is already a merged file. Delete the data from the index
            // because the merge file may not be fully merged. We will re-merge.
            logger.warn("Merged Journal File {} already exists; however, all partial journal files also exist " + "so assuming that the merge did not finish. Repeating procedure in order to ensure consistency.");
            final DeleteIndexAction deleteAction = new DeleteIndexAction(this, indexConfig, getIndexManager());
            try {
                deleteAction.execute(suggestedMergeFile);
            } catch (final Exception e) {
                logger.warn("Failed to delete records from Journal File {} from the index; this could potentially result in duplicates. Failure was due to {}", suggestedMergeFile, e.toString());
                if (logger.isDebugEnabled()) {
                    logger.warn("", e);
                }
            }
            // file and the TOC file. Otherwise, we could get the wrong copy and have issues retrieving events.
            if (!suggestedMergeFile.delete()) {
                logger.error("Failed to delete partially written Provenance Journal File {}. This may result in events from this journal " + "file not being able to be displayed. This file should be deleted manually.", suggestedMergeFile);
            }
            final File tocFile = TocUtil.getTocFile(suggestedMergeFile);
            if (tocFile.exists() && !tocFile.delete()) {
                logger.error("Failed to delete .toc file {}; this may result in not being able to read the Provenance Events from the {} Journal File. " + "This can be corrected by manually deleting the {} file", tocFile, suggestedMergeFile, tocFile);
            }
        }
    } else {
        logger.warn("Cannot merge journal files {} because they do not exist on disk", journalFiles);
        return null;
    }
    final long startNanos = System.nanoTime();
    // Map each journal to a RecordReader
    final List<RecordReader> readers = new ArrayList<>();
    int records = 0;
    final boolean isCompress = configuration.isCompressOnRollover();
    final File writerFile = isCompress ? new File(suggestedMergeFile.getParentFile(), suggestedMergeFile.getName() + ".gz") : suggestedMergeFile;
    try {
        for (final File journalFile : availableFiles) {
            try {
                // Use MAX_VALUE for number of chars because we don't want to truncate the value as we write it
                // out. This allows us to later decide that we want more characters and still be able to retrieve
                // the entire event.
                readers.add(RecordReaders.newRecordReader(journalFile, null, Integer.MAX_VALUE));
            } catch (final EOFException eof) {
            // there's nothing here. Skip over it.
            } catch (final IOException ioe) {
                logger.warn("Unable to merge {} with other Journal Files due to {}", journalFile, ioe.toString());
                if (logger.isDebugEnabled()) {
                    logger.warn("", ioe);
                }
                if (eventReporter != null) {
                    eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to merge Journal Files due to " + ioe.toString());
                }
            }
        }
        // Create a Map so that the key is the next record available from a reader and the value is the Reader from which
        // the record came. This sorted map is then used so that we are able to always get the first entry, which is the next
        // lowest record id
        final SortedMap<StandardProvenanceEventRecord, RecordReader> recordToReaderMap = new TreeMap<>(new Comparator<StandardProvenanceEventRecord>() {

            @Override
            public int compare(final StandardProvenanceEventRecord o1, final StandardProvenanceEventRecord o2) {
                return Long.compare(o1.getEventId(), o2.getEventId());
            }
        });
        long minEventId = 0L;
        long earliestTimestamp = System.currentTimeMillis();
        for (final RecordReader reader : readers) {
            StandardProvenanceEventRecord record = null;
            try {
                record = reader.nextRecord();
            } catch (final EOFException eof) {
            // record will be null and reader can no longer be used
            } catch (final Exception e) {
                logger.warn("Failed to generate Provenance Event Record from Journal due to " + e + "; it's " + "possible that the record wasn't completely written to the file. This journal will be " + "skipped.");
                if (logger.isDebugEnabled()) {
                    logger.warn("", e);
                }
                if (eventReporter != null) {
                    eventReporter.reportEvent(Severity.WARNING, EVENT_CATEGORY, "Failed to read Provenance Event " + "Record from Journal due to " + e + "; it's possible that the record wasn't " + "completely written to the file. This journal will be skipped.");
                }
            }
            if (record == null) {
                continue;
            }
            if (record.getEventTime() < earliestTimestamp) {
                earliestTimestamp = record.getEventTime();
            }
            if (record.getEventId() < minEventId) {
                minEventId = record.getEventId();
            }
            recordToReaderMap.put(record, reader);
        }
        // We want to keep track of the last 1000 events in the files so that we can add them to 'ringBuffer'.
        // However, we don't want to add them directly to ringBuffer, because once they are added to ringBuffer, they are
        // available in query results. As a result, we can have the issue where we've not finished indexing the file
        // but we try to create the lineage for events in that file. In order to avoid this, we will add the records
        // to a temporary RingBuffer and after we finish merging the records will then copy the data to the
        // ringBuffer provided as a method argument.
        final RingBuffer<ProvenanceEventRecord> latestRecords = new RingBuffer<>(1000);
        // with the next entry from the journal file from which the previous record was written.
        try (final RecordWriter writer = RecordWriters.newSchemaRecordWriter(writerFile, idGenerator, configuration.isCompressOnRollover(), true)) {
            writer.writeHeader(minEventId);
            final IndexingAction indexingAction = createIndexingAction();
            final File indexingDirectory = indexConfig.getWritableIndexDirectory(writerFile, earliestTimestamp);
            long maxId = 0L;
            final BlockingQueue<Tuple<StandardProvenanceEventRecord, Integer>> eventQueue = new LinkedBlockingQueue<>(100);
            final AtomicBoolean finishedAdding = new AtomicBoolean(false);
            final List<Future<?>> futures = new ArrayList<>();
            final EventIndexWriter indexWriter = getIndexManager().borrowIndexWriter(indexingDirectory);
            try {
                final ExecutorService exec = Executors.newFixedThreadPool(configuration.getIndexThreadPoolSize(), new ThreadFactory() {

                    @Override
                    public Thread newThread(final Runnable r) {
                        final Thread t = Executors.defaultThreadFactory().newThread(r);
                        t.setName("Index Provenance Events");
                        return t;
                    }
                });
                final AtomicInteger indexingFailureCount = new AtomicInteger(0);
                try {
                    for (int i = 0; i < configuration.getIndexThreadPoolSize(); i++) {
                        final Callable<Object> callable = new Callable<Object>() {

                            @Override
                            public Object call() throws IOException {
                                while (!eventQueue.isEmpty() || !finishedAdding.get()) {
                                    try {
                                        final Tuple<StandardProvenanceEventRecord, Integer> tuple;
                                        try {
                                            tuple = eventQueue.poll(10, TimeUnit.MILLISECONDS);
                                        } catch (final InterruptedException ie) {
                                            Thread.currentThread().interrupt();
                                            continue;
                                        }
                                        if (tuple == null) {
                                            continue;
                                        }
                                        indexingAction.index(tuple.getKey(), indexWriter.getIndexWriter(), tuple.getValue());
                                    } catch (final Throwable t) {
                                        logger.error("Failed to index Provenance Event for " + writerFile + " to " + indexingDirectory, t);
                                        if (indexingFailureCount.incrementAndGet() >= MAX_INDEXING_FAILURE_COUNT) {
                                            return null;
                                        }
                                    }
                                }
                                return null;
                            }
                        };
                        final Future<?> future = exec.submit(callable);
                        futures.add(future);
                    }
                    boolean indexEvents = true;
                    while (!recordToReaderMap.isEmpty()) {
                        final StandardProvenanceEventRecord record = recordToReaderMap.firstKey();
                        final RecordReader reader = recordToReaderMap.get(record);
                        writer.writeRecord(record);
                        final int blockIndex = writer.getTocWriter().getCurrentBlockIndex();
                        boolean accepted = false;
                        while (!accepted && indexEvents) {
                            try {
                                accepted = eventQueue.offer(new Tuple<>(record, blockIndex), 10, TimeUnit.MILLISECONDS);
                            } catch (final InterruptedException ie) {
                                Thread.currentThread().interrupt();
                            }
                            // So, if the queue is filled, we will check if this is the case.
                            if (!accepted && indexingFailureCount.get() >= MAX_INDEXING_FAILURE_COUNT) {
                                // don't add anything else to the queue.
                                indexEvents = false;
                                eventQueue.clear();
                                final String warning = String.format("Indexing Provenance Events for %s has failed %s times. This exceeds the maximum threshold of %s failures, " + "so no more Provenance Events will be indexed for this Provenance file.", writerFile, indexingFailureCount.get(), MAX_INDEXING_FAILURE_COUNT);
                                logger.warn(warning);
                                if (eventReporter != null) {
                                    eventReporter.reportEvent(Severity.WARNING, EVENT_CATEGORY, warning);
                                }
                            }
                        }
                        maxId = record.getEventId();
                        latestRecords.add(truncateAttributes(record));
                        records++;
                        // Remove this entry from the map
                        recordToReaderMap.remove(record);
                        // Get the next entry from this reader and add it to the map
                        StandardProvenanceEventRecord nextRecord = null;
                        try {
                            nextRecord = reader.nextRecord();
                        } catch (final EOFException eof) {
                        // record will be null and reader can no longer be used
                        } catch (final Exception e) {
                            logger.warn("Failed to generate Provenance Event Record from Journal due to " + e + "; it's possible that the record wasn't completely written to the file. " + "The remainder of this journal will be skipped.");
                            if (logger.isDebugEnabled()) {
                                logger.warn("", e);
                            }
                            if (eventReporter != null) {
                                eventReporter.reportEvent(Severity.WARNING, EVENT_CATEGORY, "Failed to read " + "Provenance Event Record from Journal due to " + e + "; it's possible " + "that the record wasn't completely written to the file. The remainder " + "of this journal will be skipped.");
                            }
                        }
                        if (nextRecord != null) {
                            recordToReaderMap.put(nextRecord, reader);
                        }
                    }
                } finally {
                    finishedAdding.set(true);
                    exec.shutdown();
                }
                for (final Future<?> future : futures) {
                    try {
                        future.get();
                    } catch (final ExecutionException ee) {
                        final Throwable t = ee.getCause();
                        if (t instanceof RuntimeException) {
                            throw (RuntimeException) t;
                        }
                        throw new RuntimeException(t);
                    } catch (final InterruptedException e) {
                        Thread.currentThread().interrupt();
                        throw new RuntimeException("Thread interrupted");
                    }
                }
            } finally {
                getIndexManager().returnIndexWriter(indexWriter);
            }
            indexConfig.setMaxIdIndexed(maxId);
        }
        // record should now be available in the repository. We can copy the values from latestRecords to ringBuffer.
        final RingBuffer<ProvenanceEventRecord> latestRecordBuffer = this.latestRecords;
        latestRecords.forEach(new ForEachEvaluator<ProvenanceEventRecord>() {

            @Override
            public boolean evaluate(final ProvenanceEventRecord event) {
                latestRecordBuffer.add(event);
                return true;
            }
        });
    } finally {
        for (final RecordReader reader : readers) {
            try {
                reader.close();
            } catch (final IOException ioe) {
            }
        }
    }
    // Success. Remove all of the journal files, as they're no longer needed, now that they've been merged.
    for (final File journalFile : availableFiles) {
        if (!journalFile.delete() && journalFile.exists()) {
            logger.warn("Failed to remove temporary journal file {}; this file should be cleaned up manually", journalFile.getAbsolutePath());
            if (eventReporter != null) {
                eventReporter.reportEvent(Severity.WARNING, EVENT_CATEGORY, "Failed to remove temporary journal file " + journalFile.getAbsolutePath() + "; this file should be cleaned up manually");
            }
        }
        final File tocFile = TocUtil.getTocFile(journalFile);
        if (!tocFile.delete() && tocFile.exists()) {
            logger.warn("Failed to remove temporary journal TOC file {}; this file should be cleaned up manually", tocFile.getAbsolutePath());
            if (eventReporter != null) {
                eventReporter.reportEvent(Severity.WARNING, EVENT_CATEGORY, "Failed to remove temporary journal TOC file " + tocFile.getAbsolutePath() + "; this file should be cleaned up manually");
            }
        }
    }
    if (records == 0) {
        writerFile.delete();
        logger.debug("Couldn't merge journals: No Records to merge");
        return null;
    } else {
        final long nanos = System.nanoTime() - startNanos;
        final long millis = TimeUnit.MILLISECONDS.convert(nanos, TimeUnit.NANOSECONDS);
        logger.info("Successfully merged {} journal files ({} records) into single Provenance Log File {} in {} milliseconds", numAvailableFiles, records, suggestedMergeFile, millis);
    }
    return writerFile;
}
Also used : NamedThreadFactory(org.apache.nifi.provenance.util.NamedThreadFactory) ThreadFactory(java.util.concurrent.ThreadFactory) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) ArrayList(java.util.ArrayList) RingBuffer(org.apache.nifi.util.RingBuffer) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Callable(java.util.concurrent.Callable) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) DeleteIndexAction(org.apache.nifi.provenance.lucene.DeleteIndexAction) EOFException(java.io.EOFException) ExecutionException(java.util.concurrent.ExecutionException) IndexingAction(org.apache.nifi.provenance.lucene.IndexingAction) IOException(java.io.IOException) TreeMap(java.util.TreeMap) IndexNotFoundException(org.apache.lucene.index.IndexNotFoundException) ResourceNotFoundException(org.apache.nifi.web.ResourceNotFoundException) AccessDeniedException(org.apache.nifi.authorization.AccessDeniedException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) EOFException(java.io.EOFException) FileNotFoundException(java.io.FileNotFoundException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) EventIndexWriter(org.apache.nifi.provenance.index.EventIndexWriter) File(java.io.File) Tuple(org.apache.nifi.util.Tuple)

Example 14 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class DeleteIndexAction method execute.

@Override
public File execute(final File expiredFile) throws IOException {
    // count the number of records and determine the max event id that we are deleting.
    final long numDeleted = 0;
    long maxEventId = -1L;
    try (final RecordReader reader = RecordReaders.newRecordReader(expiredFile, repository.getAllLogFiles(), Integer.MAX_VALUE)) {
        maxEventId = reader.getMaxEventId();
    } catch (final IOException ioe) {
        logger.warn("Failed to obtain max ID present in journal file {}", expiredFile.getAbsolutePath());
    }
    // remove the records from the index
    final List<File> indexDirs = indexConfiguration.getIndexDirectories(expiredFile);
    for (final File indexingDirectory : indexDirs) {
        final Term term = new Term(FieldNames.STORAGE_FILENAME, LuceneUtil.substringBefore(expiredFile.getName(), "."));
        boolean deleteDir = false;
        final EventIndexWriter writer = indexManager.borrowIndexWriter(indexingDirectory);
        try {
            final IndexWriter indexWriter = writer.getIndexWriter();
            indexWriter.deleteDocuments(term);
            indexWriter.commit();
            final int docsLeft = indexWriter.numDocs();
            deleteDir = docsLeft <= 0;
            logger.debug("After expiring {}, there are {} docs left for index {}", expiredFile, docsLeft, indexingDirectory);
        } finally {
            indexManager.returnIndexWriter(writer);
        }
        // we've confirmed that all documents have been removed. Delete the index directory.
        if (deleteDir) {
            indexManager.removeIndex(indexingDirectory);
            indexConfiguration.removeIndexDirectory(indexingDirectory);
            deleteDirectory(indexingDirectory);
            logger.info("Removed empty index directory {}", indexingDirectory);
        }
    }
    // Update the minimum index to 1 more than the max Event ID in this file.
    if (maxEventId > -1L) {
        indexConfiguration.setMinIdIndexed(maxEventId + 1L);
    }
    logger.info("Deleted Indices for Expired Provenance File {} from {} index files; {} documents removed", expiredFile, indexDirs.size(), numDeleted);
    return expiredFile;
}
Also used : IndexWriter(org.apache.lucene.index.IndexWriter) EventIndexWriter(org.apache.nifi.provenance.index.EventIndexWriter) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) EventIndexWriter(org.apache.nifi.provenance.index.EventIndexWriter) IOException(java.io.IOException) Term(org.apache.lucene.index.Term) File(java.io.File)

Example 15 with RecordReader

use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.

the class DocsReader method read.

public Set<ProvenanceEventRecord> read(final List<Document> docs, final EventAuthorizer authorizer, final Collection<Path> allProvenanceLogFiles, final AtomicInteger retrievalCount, final int maxResults, final int maxAttributeChars) throws IOException {
    if (retrievalCount.get() >= maxResults) {
        return Collections.emptySet();
    }
    final long start = System.nanoTime();
    final Set<ProvenanceEventRecord> matchingRecords = new LinkedHashSet<>();
    final Map<String, List<Document>> byStorageNameDocGroups = LuceneUtil.groupDocsByStorageFileName(docs);
    int eventsReadThisFile = 0;
    int logFileCount = 0;
    for (String storageFileName : byStorageNameDocGroups.keySet()) {
        final File provenanceEventFile = LuceneUtil.getProvenanceLogFile(storageFileName, allProvenanceLogFiles);
        if (provenanceEventFile == null) {
            logger.warn("Could not find Provenance Log File with " + "basename {} in the Provenance Repository; assuming " + "file has expired and continuing without it", storageFileName);
            continue;
        }
        try (final RecordReader reader = RecordReaders.newRecordReader(provenanceEventFile, allProvenanceLogFiles, maxAttributeChars)) {
            final Iterator<Document> docIter = byStorageNameDocGroups.get(storageFileName).iterator();
            while (docIter.hasNext() && retrievalCount.getAndIncrement() < maxResults) {
                final ProvenanceEventRecord event = getRecord(docIter.next(), reader);
                if (event != null && authorizer.isAuthorized(event)) {
                    matchingRecords.add(event);
                    eventsReadThisFile++;
                }
            }
        } catch (final Exception e) {
            logger.warn("Failed to read Provenance Events. The event file '" + provenanceEventFile.getAbsolutePath() + "' may be missing or corrupt.", e);
        }
    }
    logger.debug("Read {} records from previous file", eventsReadThisFile);
    final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
    logger.debug("Took {} ms to read {} events from {} prov log files", millis, matchingRecords.size(), logFileCount);
    return matchingRecords;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) Document(org.apache.lucene.document.Document) IOException(java.io.IOException) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) ArrayList(java.util.ArrayList) List(java.util.List) File(java.io.File)

Aggregations

RecordReader (org.apache.nifi.provenance.serialization.RecordReader)37 File (java.io.File)30 Test (org.junit.Test)18 FileInputStream (java.io.FileInputStream)16 IOException (java.io.IOException)16 RecordWriter (org.apache.nifi.provenance.serialization.RecordWriter)16 TocReader (org.apache.nifi.provenance.toc.TocReader)16 StandardTocReader (org.apache.nifi.provenance.toc.StandardTocReader)15 StandardTocWriter (org.apache.nifi.provenance.toc.StandardTocWriter)14 ArrayList (java.util.ArrayList)13 HashMap (java.util.HashMap)12 TocWriter (org.apache.nifi.provenance.toc.TocWriter)12 InputStream (java.io.InputStream)7 EOFException (java.io.EOFException)6 AtomicLong (java.util.concurrent.atomic.AtomicLong)6 Ignore (org.junit.Ignore)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 DataOutputStream (java.io.DataOutputStream)5 Path (java.nio.file.Path)5 Callable (java.util.concurrent.Callable)5