Search in sources :

Example 1 with ExpirationAction

use of org.apache.nifi.provenance.expiration.ExpirationAction in project nifi by apache.

the class PersistentProvenanceRepository method purgeOldEvents.

/**
 * Purges old events from the repository
 *
 * @throws IOException if unable to purge old events due to an I/O problem
 */
synchronized void purgeOldEvents() throws IOException {
    while (!recoveryFinished.get()) {
        try {
            Thread.sleep(100L);
        } catch (final InterruptedException ie) {
        }
    }
    final List<File> toPurge = new ArrayList<>();
    final long timeCutoff = System.currentTimeMillis() - configuration.getMaxRecordLife(TimeUnit.MILLISECONDS);
    final List<File> sortedByBasename = getLogFiles();
    long bytesUsed = getSize(sortedByBasename, timeCutoff);
    for (final Path path : idToPathMap.get().values()) {
        final File file = path.toFile();
        final long lastModified = file.lastModified();
        if (lastModified > 0L && lastModified < timeCutoff) {
            toPurge.add(file);
        }
    }
    // This comparator sorts the data based on the "basename" of the files. I.e., the numeric portion.
    // We do this because the numeric portion represents the ID of the first event in the log file.
    // As a result, we are sorting based on time, since the ID is monotonically increasing. By doing this,
    // are able to avoid hitting disk continually to check timestamps
    final Comparator<File> sortByBasenameComparator = new Comparator<File>() {

        @Override
        public int compare(final File o1, final File o2) {
            final String baseName1 = LuceneUtil.substringBefore(o1.getName(), ".");
            final String baseName2 = LuceneUtil.substringBefore(o2.getName(), ".");
            Long id1 = null;
            Long id2 = null;
            try {
                id1 = Long.parseLong(baseName1);
            } catch (final NumberFormatException nfe) {
                id1 = null;
            }
            try {
                id2 = Long.parseLong(baseName2);
            } catch (final NumberFormatException nfe) {
                id2 = null;
            }
            if (id1 == null && id2 == null) {
                return 0;
            }
            if (id1 == null) {
                return 1;
            }
            if (id2 == null) {
                return -1;
            }
            return Long.compare(id1, id2);
        }
    };
    // If we have too much data (at least 90% of our max capacity), start aging it off
    if (bytesUsed > configuration.getMaxStorageCapacity() * PURGE_OLD_EVENTS_HIGH_WATER) {
        Collections.sort(sortedByBasename, sortByBasenameComparator);
        for (final File file : sortedByBasename) {
            toPurge.add(file);
            bytesUsed -= file.length();
            if (bytesUsed < configuration.getMaxStorageCapacity() * PURGE_OLD_EVENTS_LOW_WATER) {
                // we've shrunk the repo size down enough to stop
                break;
            }
        }
    }
    // Sort all of the files that we want to purge such that the oldest events are aged off first
    Collections.sort(toPurge, sortByBasenameComparator);
    logger.debug("Purging old event files: {}", toPurge);
    // Remove any duplicates that we may have.
    final Set<File> uniqueFilesToPurge = new LinkedHashSet<>(toPurge);
    // Age off the data.
    final Set<String> removed = new LinkedHashSet<>();
    for (File file : uniqueFilesToPurge) {
        final String baseName = LuceneUtil.substringBefore(file.getName(), ".");
        ExpirationAction currentAction = null;
        try {
            for (final ExpirationAction action : expirationActions) {
                currentAction = action;
                if (!action.hasBeenPerformed(file)) {
                    final File fileBeforeAction = file;
                    final StopWatch stopWatch = new StopWatch(true);
                    file = action.execute(file);
                    stopWatch.stop();
                    logger.info("Successfully performed Expiration Action {} on Provenance Event file {} in {}", action, fileBeforeAction, stopWatch.getDuration());
                }
            }
            removed.add(baseName);
        } catch (final FileNotFoundException fnf) {
            logger.warn("Failed to perform Expiration Action {} on Provenance Event file {} because the file no longer exists; will not " + "perform additional Expiration Actions on this file", currentAction, file);
            removed.add(baseName);
        } catch (final Throwable t) {
            logger.warn("Failed to perform Expiration Action {} on Provenance Event file {} due to {}; will not perform additional " + "Expiration Actions on this file at this time", currentAction, file, t.toString());
            logger.warn("", t);
            eventReporter.reportEvent(Severity.WARNING, EVENT_CATEGORY, "Failed to perform Expiration Action " + currentAction + " on Provenance Event file " + file + " due to " + t.toString() + "; will not perform additional Expiration Actions " + "on this file at this time");
        }
    }
    // Update the Map ID to Path map to not include the removed file
    // We cannot obtain the write lock here because there may be a need for the lock in the rollover method,
    // if we have 'backpressure applied'. This would result in a deadlock because the rollover method would be
    // waiting for purgeOldEvents, and purgeOldEvents would be waiting for the write lock held by rollover.
    boolean updated = false;
    while (!updated) {
        final SortedMap<Long, Path> existingPathMap = idToPathMap.get();
        final SortedMap<Long, Path> newPathMap = new TreeMap<>(new PathMapComparator());
        newPathMap.putAll(existingPathMap);
        final Iterator<Map.Entry<Long, Path>> itr = newPathMap.entrySet().iterator();
        while (itr.hasNext()) {
            final Map.Entry<Long, Path> entry = itr.next();
            final String filename = entry.getValue().toFile().getName();
            final String baseName = LuceneUtil.substringBefore(filename, ".");
            if (removed.contains(baseName)) {
                itr.remove();
            }
        }
        updated = idToPathMap.compareAndSet(existingPathMap, newPathMap);
        logger.debug("After expiration, path map: {}", newPathMap);
    }
    purgeExpiredIndexes();
}
Also used : LinkedHashSet(java.util.LinkedHashSet) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) Comparator(java.util.Comparator) ExpirationAction(org.apache.nifi.provenance.expiration.ExpirationAction) Path(java.nio.file.Path) TreeMap(java.util.TreeMap) StopWatch(org.apache.nifi.util.StopWatch) AtomicLong(java.util.concurrent.atomic.AtomicLong) TimestampedLong(org.apache.nifi.util.timebuffer.TimestampedLong) File(java.io.File) Map(java.util.Map) TreeMap(java.util.TreeMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap)

Example 2 with ExpirationAction

use of org.apache.nifi.provenance.expiration.ExpirationAction in project nifi-minifi by apache.

the class MiNiFiPersistentProvenanceRepository method purgeOldEvents.

/**
 * Purges old events from the repository
 *
 * @throws IOException if unable to purge old events due to an I/O problem
 */
synchronized void purgeOldEvents() throws IOException {
    while (!recoveryFinished.get()) {
        try {
            Thread.sleep(100L);
        } catch (final InterruptedException ie) {
        }
    }
    final List<File> toPurge = new ArrayList<>();
    final long timeCutoff = System.currentTimeMillis() - configuration.getMaxRecordLife(TimeUnit.MILLISECONDS);
    final List<File> sortedByBasename = getLogFiles();
    long bytesUsed = getSize(sortedByBasename, timeCutoff);
    for (final Path path : idToPathMap.get().values()) {
        final File file = path.toFile();
        final long lastModified = file.lastModified();
        if (lastModified > 0L && lastModified < timeCutoff) {
            toPurge.add(file);
        }
    }
    // This comparator sorts the data based on the "basename" of the files. I.e., the numeric portion.
    // We do this because the numeric portion represents the ID of the first event in the log file.
    // As a result, we are sorting based on time, since the ID is monotonically increasing. By doing this,
    // are able to avoid hitting disk continually to check timestamps
    final Comparator<File> sortByBasenameComparator = new Comparator<File>() {

        @Override
        public int compare(final File o1, final File o2) {
            final String baseName1 = LuceneUtil.substringBefore(o1.getName(), ".");
            final String baseName2 = LuceneUtil.substringBefore(o2.getName(), ".");
            Long id1 = null;
            Long id2 = null;
            try {
                id1 = Long.parseLong(baseName1);
            } catch (final NumberFormatException nfe) {
                id1 = null;
            }
            try {
                id2 = Long.parseLong(baseName2);
            } catch (final NumberFormatException nfe) {
                id2 = null;
            }
            if (id1 == null && id2 == null) {
                return 0;
            }
            if (id1 == null) {
                return 1;
            }
            if (id2 == null) {
                return -1;
            }
            return Long.compare(id1, id2);
        }
    };
    // If we have too much data (at least 90% of our max capacity), start aging it off
    if (bytesUsed > configuration.getMaxStorageCapacity() * 0.9) {
        Collections.sort(sortedByBasename, sortByBasenameComparator);
        for (final File file : sortedByBasename) {
            toPurge.add(file);
            bytesUsed -= file.length();
            if (bytesUsed < configuration.getMaxStorageCapacity()) {
                // we've shrunk the repo size down enough to stop
                break;
            }
        }
    }
    // Sort all of the files that we want to purge such that the oldest events are aged off first
    Collections.sort(toPurge, sortByBasenameComparator);
    logger.debug("Purging old event files: {}", toPurge);
    // Remove any duplicates that we may have.
    final Set<File> uniqueFilesToPurge = new LinkedHashSet<>(toPurge);
    // Age off the data.
    final Set<String> removed = new LinkedHashSet<>();
    for (File file : uniqueFilesToPurge) {
        final String baseName = LuceneUtil.substringBefore(file.getName(), ".");
        ExpirationAction currentAction = null;
        try {
            for (final ExpirationAction action : expirationActions) {
                currentAction = action;
                if (!action.hasBeenPerformed(file)) {
                    final File fileBeforeAction = file;
                    final StopWatch stopWatch = new StopWatch(true);
                    file = action.execute(file);
                    stopWatch.stop();
                    logger.info("Successfully performed Expiration Action {} on Provenance Event file {} in {}", action, fileBeforeAction, stopWatch.getDuration());
                }
            }
            removed.add(baseName);
        } catch (final FileNotFoundException fnf) {
            logger.warn("Failed to perform Expiration Action {} on Provenance Event file {} because the file no longer exists; will not " + "perform additional Expiration Actions on this file", currentAction, file);
            removed.add(baseName);
        } catch (final Throwable t) {
            logger.warn("Failed to perform Expiration Action {} on Provenance Event file {} due to {}; will not perform additional " + "Expiration Actions on this file at this time", currentAction, file, t.toString());
            logger.warn("", t);
            eventReporter.reportEvent(Severity.WARNING, EVENT_CATEGORY, "Failed to perform Expiration Action " + currentAction + " on Provenance Event file " + file + " due to " + t.toString() + "; will not perform additional Expiration Actions " + "on this file at this time");
        }
    }
    // Update the Map ID to Path map to not include the removed file
    // We cannot obtain the write lock here because there may be a need for the lock in the rollover method,
    // if we have 'backpressure applied'. This would result in a deadlock because the rollover method would be
    // waiting for purgeOldEvents, and purgeOldEvents would be waiting for the write lock held by rollover.
    boolean updated = false;
    while (!updated) {
        final SortedMap<Long, Path> existingPathMap = idToPathMap.get();
        final SortedMap<Long, Path> newPathMap = new TreeMap<>(new PathMapComparator());
        newPathMap.putAll(existingPathMap);
        final Iterator<Map.Entry<Long, Path>> itr = newPathMap.entrySet().iterator();
        while (itr.hasNext()) {
            final Map.Entry<Long, Path> entry = itr.next();
            final String filename = entry.getValue().toFile().getName();
            final String baseName = LuceneUtil.substringBefore(filename, ".");
            if (removed.contains(baseName)) {
                itr.remove();
            }
        }
        updated = idToPathMap.compareAndSet(existingPathMap, newPathMap);
        logger.debug("After expiration, path map: {}", newPathMap);
    }
    purgeExpiredIndexes();
}
Also used : LinkedHashSet(java.util.LinkedHashSet) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) Comparator(java.util.Comparator) ExpirationAction(org.apache.nifi.provenance.expiration.ExpirationAction) Path(java.nio.file.Path) TreeMap(java.util.TreeMap) StopWatch(org.apache.nifi.util.StopWatch) TimestampedLong(org.apache.nifi.util.timebuffer.TimestampedLong) AtomicLong(java.util.concurrent.atomic.AtomicLong) File(java.io.File) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) TreeMap(java.util.TreeMap)

Aggregations

File (java.io.File)2 FileNotFoundException (java.io.FileNotFoundException)2 Path (java.nio.file.Path)2 ArrayList (java.util.ArrayList)2 Comparator (java.util.Comparator)2 HashMap (java.util.HashMap)2 LinkedHashSet (java.util.LinkedHashSet)2 Map (java.util.Map)2 SortedMap (java.util.SortedMap)2 TreeMap (java.util.TreeMap)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 ConcurrentMap (java.util.concurrent.ConcurrentMap)2 AtomicLong (java.util.concurrent.atomic.AtomicLong)2 ExpirationAction (org.apache.nifi.provenance.expiration.ExpirationAction)2 StopWatch (org.apache.nifi.util.StopWatch)2 TimestampedLong (org.apache.nifi.util.timebuffer.TimestampedLong)2