Search in sources :

Example 31 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class SequenceFileWriterImpl method writeSequenceFile.

public FlowFile writeSequenceFile(final FlowFile flowFile, final ProcessSession session, final Configuration configuration, final CompressionType compressionType, final CompressionCodec compressionCodec) {
    if (flowFile.getSize() > Integer.MAX_VALUE) {
        throw new IllegalArgumentException("Cannot write " + flowFile + "to Sequence File because its size is greater than the largest possible Integer");
    final String sequenceFilename = flowFile.getAttribute(CoreAttributes.FILENAME.key()) + ".sf";
    // Analytics running on HDFS want data that is written with a BytesWritable. However, creating a
    // BytesWritable requires that we buffer the entire file into memory in a byte array.
    // We can create an FSFilterableOutputStream to wrap the FSDataOutputStream and use that to replace
    // the InputStreamWritable class name with the BytesWritable class name when we write the header.
    // This allows the Sequence File to say that the Values are of type BytesWritable (so they can be
    // read via the BytesWritable class) while allowing us to stream the data rather than buffering
    // entire files in memory.
    final byte[] toReplace, replaceWith;
    try {
        toReplace = InputStreamWritable.class.getCanonicalName().getBytes("UTF-8");
        replaceWith = BytesWritable.class.getCanonicalName().getBytes("UTF-8");
    } catch (final UnsupportedEncodingException e) {
        // This won't happen.
        throw new RuntimeException("UTF-8 is not a supported Character Format");
    final StopWatch watch = new StopWatch(true);
    FlowFile sfFlowFile = session.write(flowFile, new StreamCallback() {

        public void process(InputStream in, OutputStream out) throws IOException {
            // Use a FilterableOutputStream to change 'InputStreamWritable' to 'BytesWritable' - see comment
            // above for an explanation of why we want to do this.
            final ByteFilteringOutputStream bwos = new ByteFilteringOutputStream(out);
            // TODO: Adding this filter could be dangerous... A Sequence File's header contains 3 bytes: "SEQ",
            // followed by 1 byte that is the Sequence File version, followed by 2 "entries." These "entries"
            // contain the size of the Key/Value type and the Key/Value type. So, we will be writing the
            // value type as InputStreamWritable -- which we need to change to BytesWritable. This means that
            // we must also change the "size" that is written, but replacing this single byte could be
            // dangerous. However, we know exactly what will be written to the header, and we limit this at one
            // replacement, so we should be just fine.
            bwos.addFilter(toReplace, replaceWith, 1);
            bwos.addFilter((byte) InputStreamWritable.class.getCanonicalName().length(), (byte) BytesWritable.class.getCanonicalName().length(), 1);
            try (final FSDataOutputStream fsDataOutputStream = new FSDataOutputStream(bwos, new Statistics(""));
                final SequenceFile.Writer writer = SequenceFile.createWriter(configuration,, SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(InputStreamWritable.class), SequenceFile.Writer.compression(compressionType, compressionCodec))) {
                processInputStream(in, flowFile, writer);
            } finally {
    logger.debug("Wrote Sequence File {} ({}).", new Object[] { sequenceFilename, watch.calculateDataRate(flowFile.getSize()) });
    return sfFlowFile;
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) BufferedInputStream( InputStream( OutputStream( ByteFilteringOutputStream(org.apache.nifi.processors.hadoop.util.ByteFilteringOutputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) UnsupportedEncodingException( IOException( Statistics(org.apache.hadoop.fs.FileSystem.Statistics) StreamCallback( StopWatch(org.apache.nifi.util.StopWatch) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) ByteFilteringOutputStream(org.apache.nifi.processors.hadoop.util.ByteFilteringOutputStream) Writer( SequenceFileWriter(org.apache.nifi.processors.hadoop.util.SequenceFileWriter)

Example 32 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class FileSystemRepository method destroyExpiredArchives.

private long destroyExpiredArchives(final String containerName, final Path container) throws IOException {
    archiveExpirationLog.debug("Destroying Expired Archives for Container {}", containerName);
    final List<ArchiveInfo> notYetExceedingThreshold = new ArrayList<>();
    long removalTimeThreshold = System.currentTimeMillis() - maxArchiveMillis;
    long oldestArchiveDateFound = System.currentTimeMillis();
    // determine how much space we must have in order to stop deleting old data
    final Long minRequiredSpace = minUsableContainerBytesForArchive.get(containerName);
    if (minRequiredSpace == null) {
        archiveExpirationLog.debug("Could not determine minimum required space so will not destroy any archived data");
        return -1L;
    final long usableSpace = getContainerUsableSpace(containerName);
    final ContainerState containerState = containerStateMap.get(containerName);
    // First, delete files from our queue
    final long startNanos = System.nanoTime();
    final long toFree = minRequiredSpace - usableSpace;
    final BlockingQueue<ArchiveInfo> fileQueue = archivedFiles.get(containerName);
    if (archiveExpirationLog.isDebugEnabled()) {
        if (toFree < 0) {
            archiveExpirationLog.debug("Currently {} bytes free for Container {}; requirement is {} byte free, so no need to free space until an additional {} bytes are used", usableSpace, containerName, minRequiredSpace, Math.abs(toFree));
        } else {
            archiveExpirationLog.debug("Currently {} bytes free for Container {}; requirement is {} byte free, so need to free {} bytes", usableSpace, containerName, minRequiredSpace, toFree);
    ArchiveInfo toDelete;
    int deleteCount = 0;
    long freed = 0L;
    while ((toDelete = fileQueue.peek()) != null) {
        try {
            final long fileSize = toDelete.getSize();
            removalTimeThreshold = System.currentTimeMillis() - maxArchiveMillis;
            // If so, then we call poll() to remove it
            if (freed < toFree || getLastModTime(toDelete.toPath()) < removalTimeThreshold) {
                // remove the head of the queue, which is already stored in 'toDelete'
                toDelete = fileQueue.poll();
                LOG.debug("Deleted archived ContentClaim with ID {} from Container {} because the archival size was exceeding the max configured size", toDelete.getName(), containerName);
                freed += fileSize;
            // If we'd freed up enough space, we're done... unless the next file needs to be destroyed based on time.
            if (freed >= toFree) {
                // If the last mod time indicates that it should be removed, just continue loop.
                if (deleteBasedOnTimestamp(fileQueue, removalTimeThreshold)) {
                    archiveExpirationLog.debug("Freed enough space ({} bytes freed, needed to free {} bytes) but will continue to expire data based on timestamp", freed, toFree);
                archiveExpirationLog.debug("Freed enough space ({} bytes freed, needed to free {} bytes). Finished expiring data", freed, toFree);
                final ArchiveInfo archiveInfo = fileQueue.peek();
                final long oldestArchiveDate = archiveInfo == null ? System.currentTimeMillis() : getLastModTime(archiveInfo.toPath());
                // Otherwise, we're done. Return the last mod time of the oldest file in the container's archive.
                final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
                if (deleteCount > 0) {
          "Deleted {} files from archive for Container {}; oldest Archive Date is now {}; container cleanup took {} millis", deleteCount, containerName, new Date(oldestArchiveDate), millis);
                } else {
                    LOG.debug("Deleted {} files from archive for Container {}; oldest Archive Date is now {}; container cleanup took {} millis", deleteCount, containerName, new Date(oldestArchiveDate), millis);
                return oldestArchiveDate;
        } catch (final IOException ioe) {
            LOG.warn("Failed to delete {} from archive due to {}", toDelete, ioe.toString());
            if (LOG.isDebugEnabled()) {
                LOG.warn("", ioe);
    // Go through each container and grab the archived data into a List
    archiveExpirationLog.debug("Searching for more archived data to expire");
    final StopWatch stopWatch = new StopWatch(true);
    for (int i = 0; i < SECTIONS_PER_CONTAINER; i++) {
        final Path sectionContainer = container.resolve(String.valueOf(i));
        final Path archive = sectionContainer.resolve("archive");
        if (!Files.exists(archive)) {
        try {
            final long timestampThreshold = removalTimeThreshold;
            Files.walkFileTree(archive, new SimpleFileVisitor<Path>() {

                public FileVisitResult visitFile(final Path file, final BasicFileAttributes attrs) throws IOException {
                    if (attrs.isDirectory()) {
                        return FileVisitResult.CONTINUE;
                    final long lastModTime = getLastModTime(file);
                    if (lastModTime < timestampThreshold) {
                        try {
                            LOG.debug("Deleted archived ContentClaim with ID {} from Container {} because it was older than the configured max archival duration", file.toFile().getName(), containerName);
                        } catch (final IOException ioe) {
                            LOG.warn("Failed to remove archived ContentClaim with ID {} from Container {} due to {}", file.toFile().getName(), containerName, ioe.toString());
                            if (LOG.isDebugEnabled()) {
                                LOG.warn("", ioe);
                    } else if (usableSpace < minRequiredSpace) {
                        notYetExceedingThreshold.add(new ArchiveInfo(container, file, attrs.size(), lastModTime));
                    return FileVisitResult.CONTINUE;
        } catch (final IOException ioe) {
            LOG.warn("Failed to cleanup archived files in {} due to {}", archive, ioe.toString());
            if (LOG.isDebugEnabled()) {
                LOG.warn("", ioe);
    final long deleteExpiredMillis = stopWatch.getElapsed(TimeUnit.MILLISECONDS);
    // Sort the list according to last modified time
    Collections.sort(notYetExceedingThreshold, new Comparator<ArchiveInfo>() {

        public int compare(final ArchiveInfo o1, final ArchiveInfo o2) {
            return, o2.getLastModTime());
    final long sortRemainingMillis = stopWatch.getElapsed(TimeUnit.MILLISECONDS) - deleteExpiredMillis;
    // Delete the oldest data
    archiveExpirationLog.debug("Deleting data based on timestamp");
    final Iterator<ArchiveInfo> itr = notYetExceedingThreshold.iterator();
    int counter = 0;
    while (itr.hasNext()) {
        final ArchiveInfo archiveInfo =;
        try {
            final Path path = archiveInfo.toPath();
            LOG.debug("Deleted archived ContentClaim with ID {} from Container {} because the archival size was exceeding the max configured size", archiveInfo.getName(), containerName);
            // Check if we've freed enough space every 25 files that we destroy
            if (++counter % 25 == 0) {
                if (getContainerUsableSpace(containerName) > minRequiredSpace) {
                    // check if we can stop now
                    LOG.debug("Finished cleaning up archive for Container {}", containerName);
        } catch (final IOException ioe) {
            LOG.warn("Failed to delete {} from archive due to {}", archiveInfo, ioe.toString());
            if (LOG.isDebugEnabled()) {
                LOG.warn("", ioe);
    final long deleteOldestMillis = stopWatch.getElapsed(TimeUnit.MILLISECONDS) - sortRemainingMillis - deleteExpiredMillis;
    long oldestContainerArchive;
    if (notYetExceedingThreshold.isEmpty()) {
        oldestContainerArchive = System.currentTimeMillis();
    } else {
        oldestContainerArchive = notYetExceedingThreshold.get(0).getLastModTime();
    if (oldestContainerArchive < oldestArchiveDateFound) {
        oldestArchiveDateFound = oldestContainerArchive;
    // Queue up the files in the order that they should be destroyed so that we don't have to scan the directories for a while.
    for (final ArchiveInfo toEnqueue : notYetExceedingThreshold.subList(0, Math.min(100000, notYetExceedingThreshold.size()))) {
    final long cleanupMillis = stopWatch.getElapsed(TimeUnit.MILLISECONDS) - deleteOldestMillis - sortRemainingMillis - deleteExpiredMillis;
    LOG.debug("Oldest Archive Date for Container {} is {}; delete expired = {} ms, sort remaining = {} ms, delete oldest = {} ms, cleanup = {} ms", containerName, new Date(oldestContainerArchive), deleteExpiredMillis, sortRemainingMillis, deleteOldestMillis, cleanupMillis);
    return oldestContainerArchive;
Also used : Path(java.nio.file.Path) ArrayList(java.util.ArrayList) FileVisitResult(java.nio.file.FileVisitResult) IOException( Date(java.util.Date) StopWatch(org.apache.nifi.util.StopWatch) AtomicLong(java.util.concurrent.atomic.AtomicLong) BasicFileAttributes(java.nio.file.attribute.BasicFileAttributes)

Example 33 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class SocketProtocolListener method dispatchRequest.

public void dispatchRequest(final Socket socket) {
    String hostname = null;
    try {
        final StopWatch stopWatch = new StopWatch(true);
        hostname = socket.getInetAddress().getHostName();
        final String requestId = UUID.randomUUID().toString();
        logger.debug("Received request {} from {}", requestId, hostname);
        String requestorDn = getRequestorDN(socket);
        // unmarshall message
        final ProtocolMessageUnmarshaller<ProtocolMessage> unmarshaller = protocolContext.createUnmarshaller();
        final ByteCountingInputStream countingIn = new ByteCountingInputStream(socket.getInputStream());
        InputStream wrappedInStream = countingIn;
        if (logger.isDebugEnabled()) {
            // don't buffer more than 1 MB of the message
            final int maxMsgBuffer = 1024 * 1024;
            final CopyingInputStream copyingInputStream = new CopyingInputStream(wrappedInStream, maxMsgBuffer);
            wrappedInStream = copyingInputStream;
        final ProtocolMessage request;
        try {
            request = unmarshaller.unmarshal(wrappedInStream);
        } finally {
            if (logger.isDebugEnabled() && wrappedInStream instanceof CopyingInputStream) {
                final CopyingInputStream copyingInputStream = (CopyingInputStream) wrappedInStream;
                byte[] receivedMessage = copyingInputStream.getBytesRead();
                logger.debug("Received message: " + new String(receivedMessage));
        // dispatch message to handler
        ProtocolHandler desiredHandler = null;
        final Collection<ProtocolHandler> handlers = getHandlers();
        for (final ProtocolHandler handler : handlers) {
            if (handler.canHandle(request)) {
                desiredHandler = handler;
        // if no handler found, throw exception; otherwise handle request
        if (desiredHandler == null) {
            logger.error("Received request of type {} but none of the following Protocol Handlers were able to process the request: {}", request.getType(), handlers);
            throw new ProtocolException("No handler assigned to handle message type: " + request.getType());
        } else {
            final ProtocolMessage response = desiredHandler.handle(request);
            if (response != null) {
                try {
                    logger.debug("Sending response for request {}", requestId);
                    // marshal message to output stream
                    final ProtocolMessageMarshaller<ProtocolMessage> marshaller = protocolContext.createMarshaller();
                    marshaller.marshal(response, socket.getOutputStream());
                } catch (final IOException ioe) {
                    throw new ProtocolException("Failed marshalling protocol message in response to message type: " + request.getType() + " due to " + ioe, ioe);
        final NodeIdentifier nodeId = getNodeIdentifier(request);
        final String from = nodeId == null ? hostname : nodeId.toString();"Finished processing request {} (type={}, length={} bytes) from {} in {}", requestId, request.getType(), countingIn.getBytesRead(), from, stopWatch.getDuration());
    } catch (final IOException | ProtocolException e) {
        logger.warn("Failed processing protocol message from " + hostname + " due to " + e, e);
        if (bulletinRepository != null) {
            final Bulletin bulletin = BulletinFactory.createBulletin("Clustering", "WARNING", String.format("Failed to process protocol message from %s due to: %s", hostname, e.toString()));
Also used : ProtocolException(org.apache.nifi.cluster.protocol.ProtocolException) ByteCountingInputStream( InputStream( ByteCountingInputStream( IOException( ProtocolMessage(org.apache.nifi.cluster.protocol.message.ProtocolMessage) StopWatch(org.apache.nifi.util.StopWatch) ProtocolHandler(org.apache.nifi.cluster.protocol.ProtocolHandler) Bulletin(org.apache.nifi.reporting.Bulletin) NodeIdentifier(org.apache.nifi.cluster.protocol.NodeIdentifier)

Example 34 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class AbstractFetchHDFSRecord method onTrigger.

public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    // do this before getting a flow file so that we always get a chance to attempt Kerberos relogin
    final FileSystem fileSystem = getFileSystem();
    final Configuration configuration = getConfiguration();
    final UserGroupInformation ugi = getUserGroupInformation();
    if (configuration == null || fileSystem == null || ugi == null) {
        getLogger().error("Processor not configured properly because Configuration, FileSystem, or UserGroupInformation was null");
    final FlowFile originalFlowFile = session.get();
    if (originalFlowFile == null) {
    ugi.doAs((PrivilegedAction<Object>) () -> {
        FlowFile child = null;
        final String filenameValue = context.getProperty(FILENAME).evaluateAttributeExpressions(originalFlowFile).getValue();
        try {
            final Path path = new Path(filenameValue);
            final AtomicReference<Throwable> exceptionHolder = new AtomicReference<>(null);
            final AtomicReference<WriteResult> writeResult = new AtomicReference<>();
            final RecordSetWriterFactory recordSetWriterFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
            final StopWatch stopWatch = new StopWatch(true);
            // use a child FlowFile so that if any error occurs we can route the original untouched FlowFile to retry/failure
            child = session.create(originalFlowFile);
            final AtomicReference<String> mimeTypeRef = new AtomicReference<>();
            child = session.write(child, (final OutputStream rawOut) -> {
                try (final BufferedOutputStream out = new BufferedOutputStream(rawOut);
                    final HDFSRecordReader recordReader = createHDFSRecordReader(context, originalFlowFile, configuration, path)) {
                    Record record = recordReader.nextRecord();
                    final RecordSchema schema = recordSetWriterFactory.getSchema(originalFlowFile.getAttributes(), record == null ? null : record.getSchema());
                    try (final RecordSetWriter recordSetWriter = recordSetWriterFactory.createWriter(getLogger(), schema, out)) {
                        if (record != null) {
                        while ((record = recordReader.nextRecord()) != null) {
                } catch (Exception e) {
            // into one of the appropriate catch blocks below
            if (exceptionHolder.get() != null) {
                throw exceptionHolder.get();
            FlowFile successFlowFile = postProcess(context, session, child, path);
            final Map<String, String> attributes = new HashMap<>(writeResult.get().getAttributes());
            attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.get().getRecordCount()));
            attributes.put(CoreAttributes.MIME_TYPE.key(), mimeTypeRef.get());
            successFlowFile = session.putAllAttributes(successFlowFile, attributes);
            final Path qualifiedPath = path.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
            getLogger().info("Successfully received content from {} for {} in {} milliseconds", new Object[] { qualifiedPath, successFlowFile, stopWatch.getDuration() });
            session.getProvenanceReporter().fetch(successFlowFile, qualifiedPath.toString(), stopWatch.getDuration(TimeUnit.MILLISECONDS));
            session.transfer(successFlowFile, REL_SUCCESS);
            return null;
        } catch (final FileNotFoundException | AccessControlException e) {
            getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, originalFlowFile, e });
            final FlowFile failureFlowFile = session.putAttribute(originalFlowFile, FETCH_FAILURE_REASON_ATTR, e.getMessage() == null ? e.toString() : e.getMessage());
            session.transfer(failureFlowFile, REL_FAILURE);
        } catch (final IOException | FlowFileAccessException e) {
            getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to retry", new Object[] { filenameValue, originalFlowFile, e });
            session.transfer(session.penalize(originalFlowFile), REL_RETRY);
        } catch (final Throwable t) {
            getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, originalFlowFile, t });
            final FlowFile failureFlowFile = session.putAttribute(originalFlowFile, FETCH_FAILURE_REASON_ATTR, t.getMessage() == null ? t.toString() : t.getMessage());
            session.transfer(failureFlowFile, REL_FAILURE);
        // if we got this far then we weren't successful so we need to clean up the child flow file if it got initialized
        if (child != null) {
        return null;
Also used : Path(org.apache.hadoop.fs.Path) FlowFile(org.apache.nifi.flowfile.FlowFile) Configuration(org.apache.hadoop.conf.Configuration) BufferedOutputStream( OutputStream( AtomicReference(java.util.concurrent.atomic.AtomicReference) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) ProcessException(org.apache.nifi.processor.exception.ProcessException) FlowFileAccessException(org.apache.nifi.processor.exception.FlowFileAccessException) IOException( FileNotFoundException( AccessControlException( StopWatch(org.apache.nifi.util.StopWatch) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) FileSystem(org.apache.hadoop.fs.FileSystem) Record(org.apache.nifi.serialization.record.Record) BufferedOutputStream( RecordSchema(org.apache.nifi.serialization.record.RecordSchema) HashMap(java.util.HashMap) Map(java.util.Map) UserGroupInformation( HDFSRecordReader(org.apache.nifi.processors.hadoop.record.HDFSRecordReader)

Example 35 with StopWatch

use of org.apache.nifi.util.StopWatch in project nifi by apache.

the class AbstractPutHDFSRecord method onTrigger.

public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    // do this before getting a flow file so that we always get a chance to attempt Kerberos relogin
    final FileSystem fileSystem = getFileSystem();
    final Configuration configuration = getConfiguration();
    final UserGroupInformation ugi = getUserGroupInformation();
    if (configuration == null || fileSystem == null || ugi == null) {
        getLogger().error("Processor not configured properly because Configuration, FileSystem, or UserGroupInformation was null");
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
    ugi.doAs((PrivilegedAction<Object>) () -> {
        Path tempDotCopyFile = null;
        FlowFile putFlowFile = flowFile;
        try {
            // TODO codec extension
            final String filenameValue = putFlowFile.getAttribute(CoreAttributes.FILENAME.key());
            final String directoryValue = context.getProperty(DIRECTORY).evaluateAttributeExpressions(putFlowFile).getValue();
            // create the directory if it doesn't exist
            final Path directoryPath = new Path(directoryValue);
            createDirectory(fileSystem, directoryPath, remoteOwner, remoteGroup);
            // write to tempFile first and on success rename to destFile
            final Path tempFile = new Path(directoryPath, "." + filenameValue);
            final Path destFile = new Path(directoryPath, filenameValue);
            final boolean destinationExists = fileSystem.exists(destFile) || fileSystem.exists(tempFile);
            final boolean shouldOverwrite = context.getProperty(OVERWRITE).asBoolean();
            // if the tempFile or destFile already exist, and overwrite is set to false, then transfer to failure
            if (destinationExists && !shouldOverwrite) {
                session.transfer(session.penalize(putFlowFile), REL_FAILURE);
                getLogger().warn("penalizing {} and routing to failure because file with same name already exists", new Object[] { putFlowFile });
                return null;
            final AtomicReference<Throwable> exceptionHolder = new AtomicReference<>(null);
            final AtomicReference<WriteResult> writeResult = new AtomicReference<>();
            final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
            final FlowFile flowFileIn = putFlowFile;
            final StopWatch stopWatch = new StopWatch(true);
            // Read records from the incoming FlowFile and write them the tempFile
  , (final InputStream rawIn) -> {
                RecordReader recordReader = null;
                HDFSRecordWriter recordWriter = null;
                try (final BufferedInputStream in = new BufferedInputStream(rawIn)) {
                    // handle this separately from the other IOExceptions which normally route to retry
                    try {
                        recordReader = recordReaderFactory.createRecordReader(flowFileIn, in, getLogger());
                    } catch (Exception e) {
                        final RecordReaderFactoryException rrfe = new RecordReaderFactoryException("Unable to create RecordReader", e);
                    final RecordSet recordSet = recordReader.createRecordSet();
                    recordWriter = createHDFSRecordWriter(context, flowFile, configuration, tempFile, recordReader.getSchema());
                } catch (Exception e) {
                } finally {
            final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize());
            final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
            tempDotCopyFile = tempFile;
            // into one of the appropriate catch blocks below
            if (exceptionHolder.get() != null) {
                throw exceptionHolder.get();
            // Attempt to rename from the tempFile to destFile, and change owner if successfully renamed
            rename(fileSystem, tempFile, destFile);
            changeOwner(fileSystem, destFile, remoteOwner, remoteGroup);
            getLogger().info("Wrote {} to {} in {} milliseconds at a rate of {}", new Object[] { putFlowFile, destFile, millis, dataRate });
            putFlowFile = postProcess(context, session, putFlowFile, destFile);
            final String newFilename = destFile.getName();
            final String hdfsPath = destFile.getParent().toString();
            // Update the filename and absolute path attributes
            final Map<String, String> attributes = new HashMap<>(writeResult.get().getAttributes());
            attributes.put(CoreAttributes.FILENAME.key(), newFilename);
            attributes.put(ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath);
            attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.get().getRecordCount()));
            putFlowFile = session.putAllAttributes(putFlowFile, attributes);
            // Send a provenance event and transfer to success
            final Path qualifiedPath = destFile.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
            session.getProvenanceReporter().send(putFlowFile, qualifiedPath.toString());
            session.transfer(putFlowFile, REL_SUCCESS);
        } catch (IOException | FlowFileAccessException e) {
            deleteQuietly(fileSystem, tempDotCopyFile);
            getLogger().error("Failed to write due to {}", new Object[] { e });
            session.transfer(session.penalize(putFlowFile), REL_RETRY);
        } catch (Throwable t) {
            deleteQuietly(fileSystem, tempDotCopyFile);
            getLogger().error("Failed to write due to {}", new Object[] { t });
            session.transfer(putFlowFile, REL_FAILURE);
        return null;
Also used : Path(org.apache.hadoop.fs.Path) FlowFile(org.apache.nifi.flowfile.FlowFile) Configuration(org.apache.hadoop.conf.Configuration) BufferedInputStream( InputStream( RecordReader(org.apache.nifi.serialization.RecordReader) HDFSRecordWriter(org.apache.nifi.processors.hadoop.record.HDFSRecordWriter) AtomicReference(java.util.concurrent.atomic.AtomicReference) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) ProcessException(org.apache.nifi.processor.exception.ProcessException) RecordReaderFactoryException(org.apache.nifi.processors.hadoop.exception.RecordReaderFactoryException) FlowFileAccessException(org.apache.nifi.processor.exception.FlowFileAccessException) FailureException(org.apache.nifi.processors.hadoop.exception.FailureException) IOException( FileNotFoundException( RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) StopWatch(org.apache.nifi.util.StopWatch) BufferedInputStream( FileSystem(org.apache.hadoop.fs.FileSystem) RecordReaderFactoryException(org.apache.nifi.processors.hadoop.exception.RecordReaderFactoryException) RecordSet(org.apache.nifi.serialization.record.RecordSet) HashMap(java.util.HashMap) Map(java.util.Map) UserGroupInformation(


StopWatch (org.apache.nifi.util.StopWatch)72 FlowFile (org.apache.nifi.flowfile.FlowFile)59 IOException ( ProcessException (org.apache.nifi.processor.exception.ProcessException)37 InputStream ( ComponentLog (org.apache.nifi.logging.ComponentLog)27 OutputStream ( HashMap (java.util.HashMap)16 ArrayList (java.util.ArrayList)13 Map (java.util.Map)11 ProcessSession (org.apache.nifi.processor.ProcessSession)11 AtomicLong (java.util.concurrent.atomic.AtomicLong)10 InputStreamCallback ( StreamCallback ( HashSet (java.util.HashSet)9 Path (org.apache.hadoop.fs.Path)9 Charset (java.nio.charset.Charset)8 AtomicReference (java.util.concurrent.atomic.AtomicReference)8 FileSystem (org.apache.hadoop.fs.FileSystem)8 PropertyDescriptor (org.apache.nifi.components.PropertyDescriptor)8