Search in sources :

Example 71 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class PutHDFS method onTrigger.

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final FileSystem hdfs = getFileSystem();
    final Configuration configuration = getConfiguration();
    final UserGroupInformation ugi = getUserGroupInformation();
    if (configuration == null || hdfs == null || ugi == null) {
        getLogger().error("HDFS not configured properly");
        session.transfer(flowFile, REL_FAILURE);
        context.yield();
        return;
    }
    ugi.doAs(new PrivilegedAction<Object>() {

        @Override
        public Object run() {
            Path tempDotCopyFile = null;
            FlowFile putFlowFile = flowFile;
            try {
                final String dirValue = context.getProperty(DIRECTORY).evaluateAttributeExpressions(putFlowFile).getValue();
                final Path configuredRootDirPath = new Path(dirValue);
                final String conflictResponse = context.getProperty(CONFLICT_RESOLUTION).getValue();
                final Double blockSizeProp = context.getProperty(BLOCK_SIZE).asDataSize(DataUnit.B);
                final long blockSize = blockSizeProp != null ? blockSizeProp.longValue() : hdfs.getDefaultBlockSize(configuredRootDirPath);
                final Double bufferSizeProp = context.getProperty(BUFFER_SIZE).asDataSize(DataUnit.B);
                final int bufferSize = bufferSizeProp != null ? bufferSizeProp.intValue() : configuration.getInt(BUFFER_SIZE_KEY, BUFFER_SIZE_DEFAULT);
                final Integer replicationProp = context.getProperty(REPLICATION_FACTOR).asInteger();
                final short replication = replicationProp != null ? replicationProp.shortValue() : hdfs.getDefaultReplication(configuredRootDirPath);
                final CompressionCodec codec = getCompressionCodec(context, configuration);
                final String filename = codec != null ? putFlowFile.getAttribute(CoreAttributes.FILENAME.key()) + codec.getDefaultExtension() : putFlowFile.getAttribute(CoreAttributes.FILENAME.key());
                final Path tempCopyFile = new Path(configuredRootDirPath, "." + filename);
                final Path copyFile = new Path(configuredRootDirPath, filename);
                // Create destination directory if it does not exist
                try {
                    if (!hdfs.getFileStatus(configuredRootDirPath).isDirectory()) {
                        throw new IOException(configuredRootDirPath.toString() + " already exists and is not a directory");
                    }
                } catch (FileNotFoundException fe) {
                    if (!hdfs.mkdirs(configuredRootDirPath)) {
                        throw new IOException(configuredRootDirPath.toString() + " could not be created");
                    }
                    changeOwner(context, hdfs, configuredRootDirPath, flowFile);
                }
                final boolean destinationExists = hdfs.exists(copyFile);
                // If destination file already exists, resolve that based on processor configuration
                if (destinationExists) {
                    switch(conflictResponse) {
                        case REPLACE_RESOLUTION:
                            if (hdfs.delete(copyFile, false)) {
                                getLogger().info("deleted {} in order to replace with the contents of {}", new Object[] { copyFile, putFlowFile });
                            }
                            break;
                        case IGNORE_RESOLUTION:
                            session.transfer(putFlowFile, REL_SUCCESS);
                            getLogger().info("transferring {} to success because file with same name already exists", new Object[] { putFlowFile });
                            return null;
                        case FAIL_RESOLUTION:
                            session.transfer(session.penalize(putFlowFile), REL_FAILURE);
                            getLogger().warn("penalizing {} and routing to failure because file with same name already exists", new Object[] { putFlowFile });
                            return null;
                        default:
                            break;
                    }
                }
                // Write FlowFile to temp file on HDFS
                final StopWatch stopWatch = new StopWatch(true);
                session.read(putFlowFile, new InputStreamCallback() {

                    @Override
                    public void process(InputStream in) throws IOException {
                        OutputStream fos = null;
                        Path createdFile = null;
                        try {
                            if (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && destinationExists) {
                                fos = hdfs.append(copyFile, bufferSize);
                            } else {
                                fos = hdfs.create(tempCopyFile, true, bufferSize, replication, blockSize);
                            }
                            if (codec != null) {
                                fos = codec.createOutputStream(fos);
                            }
                            createdFile = tempCopyFile;
                            BufferedInputStream bis = new BufferedInputStream(in);
                            StreamUtils.copy(bis, fos);
                            bis = null;
                            fos.flush();
                        } finally {
                            try {
                                if (fos != null) {
                                    fos.close();
                                }
                            } catch (RemoteException re) {
                                // when talking to remote HDFS clusters, we don't notice problems until fos.close()
                                if (createdFile != null) {
                                    try {
                                        hdfs.delete(createdFile, false);
                                    } catch (Throwable ignore) {
                                    }
                                }
                                throw re;
                            } catch (Throwable ignore) {
                            }
                            fos = null;
                        }
                    }
                });
                stopWatch.stop();
                final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize());
                final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
                tempDotCopyFile = tempCopyFile;
                if (!conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) || (conflictResponse.equals(APPEND_RESOLUTION_AV.getValue()) && !destinationExists)) {
                    boolean renamed = false;
                    for (int i = 0; i < 10; i++) {
                        // try to rename multiple times.
                        if (hdfs.rename(tempCopyFile, copyFile)) {
                            renamed = true;
                            // rename was successful
                            break;
                        }
                        // try waiting to let whatever might cause rename failure to resolve
                        Thread.sleep(200L);
                    }
                    if (!renamed) {
                        hdfs.delete(tempCopyFile, false);
                        throw new ProcessException("Copied file to HDFS but could not rename dot file " + tempCopyFile + " to its final filename");
                    }
                    changeOwner(context, hdfs, copyFile, flowFile);
                }
                getLogger().info("copied {} to HDFS at {} in {} milliseconds at a rate of {}", new Object[] { putFlowFile, copyFile, millis, dataRate });
                final String newFilename = copyFile.getName();
                final String hdfsPath = copyFile.getParent().toString();
                putFlowFile = session.putAttribute(putFlowFile, CoreAttributes.FILENAME.key(), newFilename);
                putFlowFile = session.putAttribute(putFlowFile, ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath);
                final Path qualifiedPath = copyFile.makeQualified(hdfs.getUri(), hdfs.getWorkingDirectory());
                session.getProvenanceReporter().send(putFlowFile, qualifiedPath.toString());
                session.transfer(putFlowFile, REL_SUCCESS);
            } catch (final Throwable t) {
                if (tempDotCopyFile != null) {
                    try {
                        hdfs.delete(tempDotCopyFile, false);
                    } catch (Exception e) {
                        getLogger().error("Unable to remove temporary file {} due to {}", new Object[] { tempDotCopyFile, e });
                    }
                }
                getLogger().error("Failed to write to HDFS due to {}", new Object[] { t });
                session.transfer(session.penalize(putFlowFile), REL_FAILURE);
                context.yield();
            }
            return null;
        }
    });
}
Also used : Path(org.apache.hadoop.fs.Path) FlowFile(org.apache.nifi.flowfile.FlowFile) Configuration(org.apache.hadoop.conf.Configuration) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) RemoteException(org.apache.hadoop.ipc.RemoteException) FileNotFoundException(java.io.FileNotFoundException) StopWatch(org.apache.nifi.util.StopWatch) ProcessException(org.apache.nifi.processor.exception.ProcessException) BufferedInputStream(java.io.BufferedInputStream) FileSystem(org.apache.hadoop.fs.FileSystem) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) RemoteException(org.apache.hadoop.ipc.RemoteException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 72 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi-minifi by apache.

the class MiNiFiPersistentProvenanceRepository method purgeOldEvents.

/**
 * Purges old events from the repository
 *
 * @throws IOException if unable to purge old events due to an I/O problem
 */
synchronized void purgeOldEvents() throws IOException {
    while (!recoveryFinished.get()) {
        try {
            Thread.sleep(100L);
        } catch (final InterruptedException ie) {
        }
    }
    final List<File> toPurge = new ArrayList<>();
    final long timeCutoff = System.currentTimeMillis() - configuration.getMaxRecordLife(TimeUnit.MILLISECONDS);
    final List<File> sortedByBasename = getLogFiles();
    long bytesUsed = getSize(sortedByBasename, timeCutoff);
    for (final Path path : idToPathMap.get().values()) {
        final File file = path.toFile();
        final long lastModified = file.lastModified();
        if (lastModified > 0L && lastModified < timeCutoff) {
            toPurge.add(file);
        }
    }
    // This comparator sorts the data based on the "basename" of the files. I.e., the numeric portion.
    // We do this because the numeric portion represents the ID of the first event in the log file.
    // As a result, we are sorting based on time, since the ID is monotonically increasing. By doing this,
    // are able to avoid hitting disk continually to check timestamps
    final Comparator<File> sortByBasenameComparator = new Comparator<File>() {

        @Override
        public int compare(final File o1, final File o2) {
            final String baseName1 = LuceneUtil.substringBefore(o1.getName(), ".");
            final String baseName2 = LuceneUtil.substringBefore(o2.getName(), ".");
            Long id1 = null;
            Long id2 = null;
            try {
                id1 = Long.parseLong(baseName1);
            } catch (final NumberFormatException nfe) {
                id1 = null;
            }
            try {
                id2 = Long.parseLong(baseName2);
            } catch (final NumberFormatException nfe) {
                id2 = null;
            }
            if (id1 == null && id2 == null) {
                return 0;
            }
            if (id1 == null) {
                return 1;
            }
            if (id2 == null) {
                return -1;
            }
            return Long.compare(id1, id2);
        }
    };
    // If we have too much data (at least 90% of our max capacity), start aging it off
    if (bytesUsed > configuration.getMaxStorageCapacity() * 0.9) {
        Collections.sort(sortedByBasename, sortByBasenameComparator);
        for (final File file : sortedByBasename) {
            toPurge.add(file);
            bytesUsed -= file.length();
            if (bytesUsed < configuration.getMaxStorageCapacity()) {
                // we've shrunk the repo size down enough to stop
                break;
            }
        }
    }
    // Sort all of the files that we want to purge such that the oldest events are aged off first
    Collections.sort(toPurge, sortByBasenameComparator);
    logger.debug("Purging old event files: {}", toPurge);
    // Remove any duplicates that we may have.
    final Set<File> uniqueFilesToPurge = new LinkedHashSet<>(toPurge);
    // Age off the data.
    final Set<String> removed = new LinkedHashSet<>();
    for (File file : uniqueFilesToPurge) {
        final String baseName = LuceneUtil.substringBefore(file.getName(), ".");
        ExpirationAction currentAction = null;
        try {
            for (final ExpirationAction action : expirationActions) {
                currentAction = action;
                if (!action.hasBeenPerformed(file)) {
                    final File fileBeforeAction = file;
                    final StopWatch stopWatch = new StopWatch(true);
                    file = action.execute(file);
                    stopWatch.stop();
                    logger.info("Successfully performed Expiration Action {} on Provenance Event file {} in {}", action, fileBeforeAction, stopWatch.getDuration());
                }
            }
            removed.add(baseName);
        } catch (final FileNotFoundException fnf) {
            logger.warn("Failed to perform Expiration Action {} on Provenance Event file {} because the file no longer exists; will not " + "perform additional Expiration Actions on this file", currentAction, file);
            removed.add(baseName);
        } catch (final Throwable t) {
            logger.warn("Failed to perform Expiration Action {} on Provenance Event file {} due to {}; will not perform additional " + "Expiration Actions on this file at this time", currentAction, file, t.toString());
            logger.warn("", t);
            eventReporter.reportEvent(Severity.WARNING, EVENT_CATEGORY, "Failed to perform Expiration Action " + currentAction + " on Provenance Event file " + file + " due to " + t.toString() + "; will not perform additional Expiration Actions " + "on this file at this time");
        }
    }
    // Update the Map ID to Path map to not include the removed file
    // We cannot obtain the write lock here because there may be a need for the lock in the rollover method,
    // if we have 'backpressure applied'. This would result in a deadlock because the rollover method would be
    // waiting for purgeOldEvents, and purgeOldEvents would be waiting for the write lock held by rollover.
    boolean updated = false;
    while (!updated) {
        final SortedMap<Long, Path> existingPathMap = idToPathMap.get();
        final SortedMap<Long, Path> newPathMap = new TreeMap<>(new PathMapComparator());
        newPathMap.putAll(existingPathMap);
        final Iterator<Map.Entry<Long, Path>> itr = newPathMap.entrySet().iterator();
        while (itr.hasNext()) {
            final Map.Entry<Long, Path> entry = itr.next();
            final String filename = entry.getValue().toFile().getName();
            final String baseName = LuceneUtil.substringBefore(filename, ".");
            if (removed.contains(baseName)) {
                itr.remove();
            }
        }
        updated = idToPathMap.compareAndSet(existingPathMap, newPathMap);
        logger.debug("After expiration, path map: {}", newPathMap);
    }
    purgeExpiredIndexes();
}
Also used : LinkedHashSet(java.util.LinkedHashSet) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) Comparator(java.util.Comparator) ExpirationAction(org.apache.nifi.provenance.expiration.ExpirationAction) Path(java.nio.file.Path) TreeMap(java.util.TreeMap) StopWatch(org.apache.nifi.util.StopWatch) TimestampedLong(org.apache.nifi.util.timebuffer.TimestampedLong) AtomicLong(java.util.concurrent.atomic.AtomicLong) File(java.io.File) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) TreeMap(java.util.TreeMap)

Aggregations

StopWatch (org.apache.nifi.util.StopWatch)72 FlowFile (org.apache.nifi.flowfile.FlowFile)59 IOException (java.io.IOException)41 ProcessException (org.apache.nifi.processor.exception.ProcessException)37 InputStream (java.io.InputStream)27 ComponentLog (org.apache.nifi.logging.ComponentLog)27 OutputStream (java.io.OutputStream)21 HashMap (java.util.HashMap)16 ArrayList (java.util.ArrayList)13 Map (java.util.Map)11 ProcessSession (org.apache.nifi.processor.ProcessSession)11 AtomicLong (java.util.concurrent.atomic.AtomicLong)10 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)10 StreamCallback (org.apache.nifi.processor.io.StreamCallback)10 HashSet (java.util.HashSet)9 Path (org.apache.hadoop.fs.Path)9 Charset (java.nio.charset.Charset)8 AtomicReference (java.util.concurrent.atomic.AtomicReference)8 FileSystem (org.apache.hadoop.fs.FileSystem)8 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)8