Search in sources :

Example 1 with DeleteIndexAction

use of org.apache.nifi.provenance.lucene.DeleteIndexAction in project nifi by apache.

the class PersistentProvenanceRepository method mergeJournals.

/**
 * <p>
 * Merges all of the given Journal Files into a single, merged Provenance
 * Event Log File. As these records are merged, they will be compressed, if
 * the repository is configured to compress records, and will be indexed.
 * </p>
 *
 * <p>
 * If the repository is configured to compress the data, the file written to
 * may not be the same as the <code>suggestedMergeFile</code>, as a filename
 * extension of '.gz' may be appended. If the journals are successfully
 * merged, the file that they were merged into will be returned. If unable
 * to merge the records (for instance, because the repository has been
 * closed or because the list of journal files was empty), this method will
 * return <code>null</code>.
 * </p>
 *
 * @param journalFiles the journal files to merge
 * @param suggestedMergeFile the file to write the merged records to
 * @param eventReporter the event reporter to report any warnings or errors
 * to; may be null.
 *
 * @return the file that the given journals were merged into, or
 * <code>null</code> if no records were merged.
 *
 * @throws IOException if a problem occurs writing to the mergedFile,
 * reading from a journal, or updating the Lucene Index.
 */
File mergeJournals(final List<File> journalFiles, final File suggestedMergeFile, final EventReporter eventReporter) throws IOException {
    logger.debug("Merging {} to {}", journalFiles, suggestedMergeFile);
    if (this.closed.get()) {
        logger.info("Provenance Repository has been closed; will not merge journal files to {}", suggestedMergeFile);
        return null;
    }
    if (journalFiles.isEmpty()) {
        logger.debug("Couldn't merge journals: Journal Files is empty; won't merge journals");
        return null;
    }
    Collections.sort(journalFiles, new Comparator<File>() {

        @Override
        public int compare(final File o1, final File o2) {
            final String suffix1 = LuceneUtil.substringAfterLast(o1.getName(), ".");
            final String suffix2 = LuceneUtil.substringAfterLast(o2.getName(), ".");
            try {
                final int journalIndex1 = Integer.parseInt(suffix1);
                final int journalIndex2 = Integer.parseInt(suffix2);
                return Integer.compare(journalIndex1, journalIndex2);
            } catch (final NumberFormatException nfe) {
                return o1.getName().compareTo(o2.getName());
            }
        }
    });
    // Search for any missing files. At this point they should have been written to disk otherwise cannot continue.
    // Missing files is most likely due to incomplete cleanup of files post merge
    final List<File> availableFiles = filterUnavailableFiles(journalFiles);
    final int numAvailableFiles = availableFiles.size();
    // check if we have all of the "partial" files for the journal.
    if (numAvailableFiles > 0) {
        if (suggestedMergeFile.exists()) {
            // we have all "partial" files and there is already a merged file. Delete the data from the index
            // because the merge file may not be fully merged. We will re-merge.
            logger.warn("Merged Journal File {} already exists; however, all partial journal files also exist " + "so assuming that the merge did not finish. Repeating procedure in order to ensure consistency.");
            final DeleteIndexAction deleteAction = new DeleteIndexAction(this, indexConfig, getIndexManager());
            try {
                deleteAction.execute(suggestedMergeFile);
            } catch (final Exception e) {
                logger.warn("Failed to delete records from Journal File {} from the index; this could potentially result in duplicates. Failure was due to {}", suggestedMergeFile, e.toString());
                if (logger.isDebugEnabled()) {
                    logger.warn("", e);
                }
            }
            // file and the TOC file. Otherwise, we could get the wrong copy and have issues retrieving events.
            if (!suggestedMergeFile.delete()) {
                logger.error("Failed to delete partially written Provenance Journal File {}. This may result in events from this journal " + "file not being able to be displayed. This file should be deleted manually.", suggestedMergeFile);
            }
            final File tocFile = TocUtil.getTocFile(suggestedMergeFile);
            if (tocFile.exists() && !tocFile.delete()) {
                logger.error("Failed to delete .toc file {}; this may result in not being able to read the Provenance Events from the {} Journal File. " + "This can be corrected by manually deleting the {} file", tocFile, suggestedMergeFile, tocFile);
            }
        }
    } else {
        logger.warn("Cannot merge journal files {} because they do not exist on disk", journalFiles);
        return null;
    }
    final long startNanos = System.nanoTime();
    // Map each journal to a RecordReader
    final List<RecordReader> readers = new ArrayList<>();
    int records = 0;
    final boolean isCompress = configuration.isCompressOnRollover();
    final File writerFile = isCompress ? new File(suggestedMergeFile.getParentFile(), suggestedMergeFile.getName() + ".gz") : suggestedMergeFile;
    try {
        for (final File journalFile : availableFiles) {
            try {
                // Use MAX_VALUE for number of chars because we don't want to truncate the value as we write it
                // out. This allows us to later decide that we want more characters and still be able to retrieve
                // the entire event.
                readers.add(RecordReaders.newRecordReader(journalFile, null, Integer.MAX_VALUE));
            } catch (final EOFException eof) {
            // there's nothing here. Skip over it.
            } catch (final IOException ioe) {
                logger.warn("Unable to merge {} with other Journal Files due to {}", journalFile, ioe.toString());
                if (logger.isDebugEnabled()) {
                    logger.warn("", ioe);
                }
                if (eventReporter != null) {
                    eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to merge Journal Files due to " + ioe.toString());
                }
            }
        }
        // Create a Map so that the key is the next record available from a reader and the value is the Reader from which
        // the record came. This sorted map is then used so that we are able to always get the first entry, which is the next
        // lowest record id
        final SortedMap<StandardProvenanceEventRecord, RecordReader> recordToReaderMap = new TreeMap<>(new Comparator<StandardProvenanceEventRecord>() {

            @Override
            public int compare(final StandardProvenanceEventRecord o1, final StandardProvenanceEventRecord o2) {
                return Long.compare(o1.getEventId(), o2.getEventId());
            }
        });
        long minEventId = 0L;
        long earliestTimestamp = System.currentTimeMillis();
        for (final RecordReader reader : readers) {
            StandardProvenanceEventRecord record = null;
            try {
                record = reader.nextRecord();
            } catch (final EOFException eof) {
            // record will be null and reader can no longer be used
            } catch (final Exception e) {
                logger.warn("Failed to generate Provenance Event Record from Journal due to " + e + "; it's " + "possible that the record wasn't completely written to the file. This journal will be " + "skipped.");
                if (logger.isDebugEnabled()) {
                    logger.warn("", e);
                }
                if (eventReporter != null) {
                    eventReporter.reportEvent(Severity.WARNING, EVENT_CATEGORY, "Failed to read Provenance Event " + "Record from Journal due to " + e + "; it's possible that the record wasn't " + "completely written to the file. This journal will be skipped.");
                }
            }
            if (record == null) {
                continue;
            }
            if (record.getEventTime() < earliestTimestamp) {
                earliestTimestamp = record.getEventTime();
            }
            if (record.getEventId() < minEventId) {
                minEventId = record.getEventId();
            }
            recordToReaderMap.put(record, reader);
        }
        // We want to keep track of the last 1000 events in the files so that we can add them to 'ringBuffer'.
        // However, we don't want to add them directly to ringBuffer, because once they are added to ringBuffer, they are
        // available in query results. As a result, we can have the issue where we've not finished indexing the file
        // but we try to create the lineage for events in that file. In order to avoid this, we will add the records
        // to a temporary RingBuffer and after we finish merging the records will then copy the data to the
        // ringBuffer provided as a method argument.
        final RingBuffer<ProvenanceEventRecord> latestRecords = new RingBuffer<>(1000);
        // with the next entry from the journal file from which the previous record was written.
        try (final RecordWriter writer = RecordWriters.newSchemaRecordWriter(writerFile, idGenerator, configuration.isCompressOnRollover(), true)) {
            writer.writeHeader(minEventId);
            final IndexingAction indexingAction = createIndexingAction();
            final File indexingDirectory = indexConfig.getWritableIndexDirectory(writerFile, earliestTimestamp);
            long maxId = 0L;
            final BlockingQueue<Tuple<StandardProvenanceEventRecord, Integer>> eventQueue = new LinkedBlockingQueue<>(100);
            final AtomicBoolean finishedAdding = new AtomicBoolean(false);
            final List<Future<?>> futures = new ArrayList<>();
            final EventIndexWriter indexWriter = getIndexManager().borrowIndexWriter(indexingDirectory);
            try {
                final ExecutorService exec = Executors.newFixedThreadPool(configuration.getIndexThreadPoolSize(), new ThreadFactory() {

                    @Override
                    public Thread newThread(final Runnable r) {
                        final Thread t = Executors.defaultThreadFactory().newThread(r);
                        t.setName("Index Provenance Events");
                        return t;
                    }
                });
                final AtomicInteger indexingFailureCount = new AtomicInteger(0);
                try {
                    for (int i = 0; i < configuration.getIndexThreadPoolSize(); i++) {
                        final Callable<Object> callable = new Callable<Object>() {

                            @Override
                            public Object call() throws IOException {
                                while (!eventQueue.isEmpty() || !finishedAdding.get()) {
                                    try {
                                        final Tuple<StandardProvenanceEventRecord, Integer> tuple;
                                        try {
                                            tuple = eventQueue.poll(10, TimeUnit.MILLISECONDS);
                                        } catch (final InterruptedException ie) {
                                            Thread.currentThread().interrupt();
                                            continue;
                                        }
                                        if (tuple == null) {
                                            continue;
                                        }
                                        indexingAction.index(tuple.getKey(), indexWriter.getIndexWriter(), tuple.getValue());
                                    } catch (final Throwable t) {
                                        logger.error("Failed to index Provenance Event for " + writerFile + " to " + indexingDirectory, t);
                                        if (indexingFailureCount.incrementAndGet() >= MAX_INDEXING_FAILURE_COUNT) {
                                            return null;
                                        }
                                    }
                                }
                                return null;
                            }
                        };
                        final Future<?> future = exec.submit(callable);
                        futures.add(future);
                    }
                    boolean indexEvents = true;
                    while (!recordToReaderMap.isEmpty()) {
                        final StandardProvenanceEventRecord record = recordToReaderMap.firstKey();
                        final RecordReader reader = recordToReaderMap.get(record);
                        writer.writeRecord(record);
                        final int blockIndex = writer.getTocWriter().getCurrentBlockIndex();
                        boolean accepted = false;
                        while (!accepted && indexEvents) {
                            try {
                                accepted = eventQueue.offer(new Tuple<>(record, blockIndex), 10, TimeUnit.MILLISECONDS);
                            } catch (final InterruptedException ie) {
                                Thread.currentThread().interrupt();
                            }
                            // So, if the queue is filled, we will check if this is the case.
                            if (!accepted && indexingFailureCount.get() >= MAX_INDEXING_FAILURE_COUNT) {
                                // don't add anything else to the queue.
                                indexEvents = false;
                                eventQueue.clear();
                                final String warning = String.format("Indexing Provenance Events for %s has failed %s times. This exceeds the maximum threshold of %s failures, " + "so no more Provenance Events will be indexed for this Provenance file.", writerFile, indexingFailureCount.get(), MAX_INDEXING_FAILURE_COUNT);
                                logger.warn(warning);
                                if (eventReporter != null) {
                                    eventReporter.reportEvent(Severity.WARNING, EVENT_CATEGORY, warning);
                                }
                            }
                        }
                        maxId = record.getEventId();
                        latestRecords.add(truncateAttributes(record));
                        records++;
                        // Remove this entry from the map
                        recordToReaderMap.remove(record);
                        // Get the next entry from this reader and add it to the map
                        StandardProvenanceEventRecord nextRecord = null;
                        try {
                            nextRecord = reader.nextRecord();
                        } catch (final EOFException eof) {
                        // record will be null and reader can no longer be used
                        } catch (final Exception e) {
                            logger.warn("Failed to generate Provenance Event Record from Journal due to " + e + "; it's possible that the record wasn't completely written to the file. " + "The remainder of this journal will be skipped.");
                            if (logger.isDebugEnabled()) {
                                logger.warn("", e);
                            }
                            if (eventReporter != null) {
                                eventReporter.reportEvent(Severity.WARNING, EVENT_CATEGORY, "Failed to read " + "Provenance Event Record from Journal due to " + e + "; it's possible " + "that the record wasn't completely written to the file. The remainder " + "of this journal will be skipped.");
                            }
                        }
                        if (nextRecord != null) {
                            recordToReaderMap.put(nextRecord, reader);
                        }
                    }
                } finally {
                    finishedAdding.set(true);
                    exec.shutdown();
                }
                for (final Future<?> future : futures) {
                    try {
                        future.get();
                    } catch (final ExecutionException ee) {
                        final Throwable t = ee.getCause();
                        if (t instanceof RuntimeException) {
                            throw (RuntimeException) t;
                        }
                        throw new RuntimeException(t);
                    } catch (final InterruptedException e) {
                        Thread.currentThread().interrupt();
                        throw new RuntimeException("Thread interrupted");
                    }
                }
            } finally {
                getIndexManager().returnIndexWriter(indexWriter);
            }
            indexConfig.setMaxIdIndexed(maxId);
        }
        // record should now be available in the repository. We can copy the values from latestRecords to ringBuffer.
        final RingBuffer<ProvenanceEventRecord> latestRecordBuffer = this.latestRecords;
        latestRecords.forEach(new ForEachEvaluator<ProvenanceEventRecord>() {

            @Override
            public boolean evaluate(final ProvenanceEventRecord event) {
                latestRecordBuffer.add(event);
                return true;
            }
        });
    } finally {
        for (final RecordReader reader : readers) {
            try {
                reader.close();
            } catch (final IOException ioe) {
            }
        }
    }
    // Success. Remove all of the journal files, as they're no longer needed, now that they've been merged.
    for (final File journalFile : availableFiles) {
        if (!journalFile.delete() && journalFile.exists()) {
            logger.warn("Failed to remove temporary journal file {}; this file should be cleaned up manually", journalFile.getAbsolutePath());
            if (eventReporter != null) {
                eventReporter.reportEvent(Severity.WARNING, EVENT_CATEGORY, "Failed to remove temporary journal file " + journalFile.getAbsolutePath() + "; this file should be cleaned up manually");
            }
        }
        final File tocFile = TocUtil.getTocFile(journalFile);
        if (!tocFile.delete() && tocFile.exists()) {
            logger.warn("Failed to remove temporary journal TOC file {}; this file should be cleaned up manually", tocFile.getAbsolutePath());
            if (eventReporter != null) {
                eventReporter.reportEvent(Severity.WARNING, EVENT_CATEGORY, "Failed to remove temporary journal TOC file " + tocFile.getAbsolutePath() + "; this file should be cleaned up manually");
            }
        }
    }
    if (records == 0) {
        writerFile.delete();
        logger.debug("Couldn't merge journals: No Records to merge");
        return null;
    } else {
        final long nanos = System.nanoTime() - startNanos;
        final long millis = TimeUnit.MILLISECONDS.convert(nanos, TimeUnit.NANOSECONDS);
        logger.info("Successfully merged {} journal files ({} records) into single Provenance Log File {} in {} milliseconds", numAvailableFiles, records, suggestedMergeFile, millis);
    }
    return writerFile;
}
Also used : NamedThreadFactory(org.apache.nifi.provenance.util.NamedThreadFactory) ThreadFactory(java.util.concurrent.ThreadFactory) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) ArrayList(java.util.ArrayList) RingBuffer(org.apache.nifi.util.RingBuffer) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) Callable(java.util.concurrent.Callable) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) DeleteIndexAction(org.apache.nifi.provenance.lucene.DeleteIndexAction) EOFException(java.io.EOFException) ExecutionException(java.util.concurrent.ExecutionException) IndexingAction(org.apache.nifi.provenance.lucene.IndexingAction) IOException(java.io.IOException) TreeMap(java.util.TreeMap) IndexNotFoundException(org.apache.lucene.index.IndexNotFoundException) ResourceNotFoundException(org.apache.nifi.web.ResourceNotFoundException) AccessDeniedException(org.apache.nifi.authorization.AccessDeniedException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) EOFException(java.io.EOFException) FileNotFoundException(java.io.FileNotFoundException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) EventIndexWriter(org.apache.nifi.provenance.index.EventIndexWriter) File(java.io.File) Tuple(org.apache.nifi.util.Tuple)

Aggregations

EOFException (java.io.EOFException)1 File (java.io.File)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 TreeMap (java.util.TreeMap)1 Callable (java.util.concurrent.Callable)1 ExecutionException (java.util.concurrent.ExecutionException)1 ExecutorService (java.util.concurrent.ExecutorService)1 Future (java.util.concurrent.Future)1 LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)1 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)1 ThreadFactory (java.util.concurrent.ThreadFactory)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 IndexNotFoundException (org.apache.lucene.index.IndexNotFoundException)1 AccessDeniedException (org.apache.nifi.authorization.AccessDeniedException)1 EventIndexWriter (org.apache.nifi.provenance.index.EventIndexWriter)1 DeleteIndexAction (org.apache.nifi.provenance.lucene.DeleteIndexAction)1 IndexingAction (org.apache.nifi.provenance.lucene.IndexingAction)1