Search in sources :

Example 11 with FlowFileAccessException

use of org.apache.nifi.processor.exception.FlowFileAccessException in project nifi by apache.

the class FlowFileAccessInputStream method skip.

@Override
public long skip(final long n) throws IOException {
    try {
        final long count = super.skip(n);
        bytesConsumed += count;
        return count;
    } catch (final IOException ioe) {
        throw new FlowFileAccessException("Could not skip data in " + flowFile, ioe);
    }
}
Also used : FlowFileAccessException(org.apache.nifi.processor.exception.FlowFileAccessException) IOException(java.io.IOException)

Example 12 with FlowFileAccessException

use of org.apache.nifi.processor.exception.FlowFileAccessException in project nifi by apache.

the class AbstractFetchHDFSRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    // do this before getting a flow file so that we always get a chance to attempt Kerberos relogin
    final FileSystem fileSystem = getFileSystem();
    final Configuration configuration = getConfiguration();
    final UserGroupInformation ugi = getUserGroupInformation();
    if (configuration == null || fileSystem == null || ugi == null) {
        getLogger().error("Processor not configured properly because Configuration, FileSystem, or UserGroupInformation was null");
        context.yield();
        return;
    }
    final FlowFile originalFlowFile = session.get();
    if (originalFlowFile == null) {
        context.yield();
        return;
    }
    ugi.doAs((PrivilegedAction<Object>) () -> {
        FlowFile child = null;
        final String filenameValue = context.getProperty(FILENAME).evaluateAttributeExpressions(originalFlowFile).getValue();
        try {
            final Path path = new Path(filenameValue);
            final AtomicReference<Throwable> exceptionHolder = new AtomicReference<>(null);
            final AtomicReference<WriteResult> writeResult = new AtomicReference<>();
            final RecordSetWriterFactory recordSetWriterFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
            final StopWatch stopWatch = new StopWatch(true);
            // use a child FlowFile so that if any error occurs we can route the original untouched FlowFile to retry/failure
            child = session.create(originalFlowFile);
            final AtomicReference<String> mimeTypeRef = new AtomicReference<>();
            child = session.write(child, (final OutputStream rawOut) -> {
                try (final BufferedOutputStream out = new BufferedOutputStream(rawOut);
                    final HDFSRecordReader recordReader = createHDFSRecordReader(context, originalFlowFile, configuration, path)) {
                    Record record = recordReader.nextRecord();
                    final RecordSchema schema = recordSetWriterFactory.getSchema(originalFlowFile.getAttributes(), record == null ? null : record.getSchema());
                    try (final RecordSetWriter recordSetWriter = recordSetWriterFactory.createWriter(getLogger(), schema, out)) {
                        recordSetWriter.beginRecordSet();
                        if (record != null) {
                            recordSetWriter.write(record);
                        }
                        while ((record = recordReader.nextRecord()) != null) {
                            recordSetWriter.write(record);
                        }
                        writeResult.set(recordSetWriter.finishRecordSet());
                        mimeTypeRef.set(recordSetWriter.getMimeType());
                    }
                } catch (Exception e) {
                    exceptionHolder.set(e);
                }
            });
            stopWatch.stop();
            // into one of the appropriate catch blocks below
            if (exceptionHolder.get() != null) {
                throw exceptionHolder.get();
            }
            FlowFile successFlowFile = postProcess(context, session, child, path);
            final Map<String, String> attributes = new HashMap<>(writeResult.get().getAttributes());
            attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.get().getRecordCount()));
            attributes.put(CoreAttributes.MIME_TYPE.key(), mimeTypeRef.get());
            successFlowFile = session.putAllAttributes(successFlowFile, attributes);
            final Path qualifiedPath = path.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
            getLogger().info("Successfully received content from {} for {} in {} milliseconds", new Object[] { qualifiedPath, successFlowFile, stopWatch.getDuration() });
            session.getProvenanceReporter().fetch(successFlowFile, qualifiedPath.toString(), stopWatch.getDuration(TimeUnit.MILLISECONDS));
            session.transfer(successFlowFile, REL_SUCCESS);
            session.remove(originalFlowFile);
            return null;
        } catch (final FileNotFoundException | AccessControlException e) {
            getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, originalFlowFile, e });
            final FlowFile failureFlowFile = session.putAttribute(originalFlowFile, FETCH_FAILURE_REASON_ATTR, e.getMessage() == null ? e.toString() : e.getMessage());
            session.transfer(failureFlowFile, REL_FAILURE);
        } catch (final IOException | FlowFileAccessException e) {
            getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to retry", new Object[] { filenameValue, originalFlowFile, e });
            session.transfer(session.penalize(originalFlowFile), REL_RETRY);
            context.yield();
        } catch (final Throwable t) {
            getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] { filenameValue, originalFlowFile, t });
            final FlowFile failureFlowFile = session.putAttribute(originalFlowFile, FETCH_FAILURE_REASON_ATTR, t.getMessage() == null ? t.toString() : t.getMessage());
            session.transfer(failureFlowFile, REL_FAILURE);
        }
        // if we got this far then we weren't successful so we need to clean up the child flow file if it got initialized
        if (child != null) {
            session.remove(child);
        }
        return null;
    });
}
Also used : Path(org.apache.hadoop.fs.Path) FlowFile(org.apache.nifi.flowfile.FlowFile) Configuration(org.apache.hadoop.conf.Configuration) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) ProcessException(org.apache.nifi.processor.exception.ProcessException) FlowFileAccessException(org.apache.nifi.processor.exception.FlowFileAccessException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) AccessControlException(org.apache.hadoop.security.AccessControlException) StopWatch(org.apache.nifi.util.StopWatch) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) FileSystem(org.apache.hadoop.fs.FileSystem) Record(org.apache.nifi.serialization.record.Record) BufferedOutputStream(java.io.BufferedOutputStream) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) HashMap(java.util.HashMap) Map(java.util.Map) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) HDFSRecordReader(org.apache.nifi.processors.hadoop.record.HDFSRecordReader)

Example 13 with FlowFileAccessException

use of org.apache.nifi.processor.exception.FlowFileAccessException in project nifi by apache.

the class AbstractPutHDFSRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    // do this before getting a flow file so that we always get a chance to attempt Kerberos relogin
    final FileSystem fileSystem = getFileSystem();
    final Configuration configuration = getConfiguration();
    final UserGroupInformation ugi = getUserGroupInformation();
    if (configuration == null || fileSystem == null || ugi == null) {
        getLogger().error("Processor not configured properly because Configuration, FileSystem, or UserGroupInformation was null");
        context.yield();
        return;
    }
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        context.yield();
        return;
    }
    ugi.doAs((PrivilegedAction<Object>) () -> {
        Path tempDotCopyFile = null;
        FlowFile putFlowFile = flowFile;
        try {
            // TODO codec extension
            final String filenameValue = putFlowFile.getAttribute(CoreAttributes.FILENAME.key());
            final String directoryValue = context.getProperty(DIRECTORY).evaluateAttributeExpressions(putFlowFile).getValue();
            // create the directory if it doesn't exist
            final Path directoryPath = new Path(directoryValue);
            createDirectory(fileSystem, directoryPath, remoteOwner, remoteGroup);
            // write to tempFile first and on success rename to destFile
            final Path tempFile = new Path(directoryPath, "." + filenameValue);
            final Path destFile = new Path(directoryPath, filenameValue);
            final boolean destinationExists = fileSystem.exists(destFile) || fileSystem.exists(tempFile);
            final boolean shouldOverwrite = context.getProperty(OVERWRITE).asBoolean();
            // if the tempFile or destFile already exist, and overwrite is set to false, then transfer to failure
            if (destinationExists && !shouldOverwrite) {
                session.transfer(session.penalize(putFlowFile), REL_FAILURE);
                getLogger().warn("penalizing {} and routing to failure because file with same name already exists", new Object[] { putFlowFile });
                return null;
            }
            final AtomicReference<Throwable> exceptionHolder = new AtomicReference<>(null);
            final AtomicReference<WriteResult> writeResult = new AtomicReference<>();
            final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
            final FlowFile flowFileIn = putFlowFile;
            final StopWatch stopWatch = new StopWatch(true);
            // Read records from the incoming FlowFile and write them the tempFile
            session.read(putFlowFile, (final InputStream rawIn) -> {
                RecordReader recordReader = null;
                HDFSRecordWriter recordWriter = null;
                try (final BufferedInputStream in = new BufferedInputStream(rawIn)) {
                    // handle this separately from the other IOExceptions which normally route to retry
                    try {
                        recordReader = recordReaderFactory.createRecordReader(flowFileIn, in, getLogger());
                    } catch (Exception e) {
                        final RecordReaderFactoryException rrfe = new RecordReaderFactoryException("Unable to create RecordReader", e);
                        exceptionHolder.set(rrfe);
                        return;
                    }
                    final RecordSet recordSet = recordReader.createRecordSet();
                    recordWriter = createHDFSRecordWriter(context, flowFile, configuration, tempFile, recordReader.getSchema());
                    writeResult.set(recordWriter.write(recordSet));
                } catch (Exception e) {
                    exceptionHolder.set(e);
                } finally {
                    IOUtils.closeQuietly(recordReader);
                    IOUtils.closeQuietly(recordWriter);
                }
            });
            stopWatch.stop();
            final String dataRate = stopWatch.calculateDataRate(putFlowFile.getSize());
            final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
            tempDotCopyFile = tempFile;
            // into one of the appropriate catch blocks below
            if (exceptionHolder.get() != null) {
                throw exceptionHolder.get();
            }
            // Attempt to rename from the tempFile to destFile, and change owner if successfully renamed
            rename(fileSystem, tempFile, destFile);
            changeOwner(fileSystem, destFile, remoteOwner, remoteGroup);
            getLogger().info("Wrote {} to {} in {} milliseconds at a rate of {}", new Object[] { putFlowFile, destFile, millis, dataRate });
            putFlowFile = postProcess(context, session, putFlowFile, destFile);
            final String newFilename = destFile.getName();
            final String hdfsPath = destFile.getParent().toString();
            // Update the filename and absolute path attributes
            final Map<String, String> attributes = new HashMap<>(writeResult.get().getAttributes());
            attributes.put(CoreAttributes.FILENAME.key(), newFilename);
            attributes.put(ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath);
            attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.get().getRecordCount()));
            putFlowFile = session.putAllAttributes(putFlowFile, attributes);
            // Send a provenance event and transfer to success
            final Path qualifiedPath = destFile.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
            session.getProvenanceReporter().send(putFlowFile, qualifiedPath.toString());
            session.transfer(putFlowFile, REL_SUCCESS);
        } catch (IOException | FlowFileAccessException e) {
            deleteQuietly(fileSystem, tempDotCopyFile);
            getLogger().error("Failed to write due to {}", new Object[] { e });
            session.transfer(session.penalize(putFlowFile), REL_RETRY);
            context.yield();
        } catch (Throwable t) {
            deleteQuietly(fileSystem, tempDotCopyFile);
            getLogger().error("Failed to write due to {}", new Object[] { t });
            session.transfer(putFlowFile, REL_FAILURE);
        }
        return null;
    });
}
Also used : Path(org.apache.hadoop.fs.Path) FlowFile(org.apache.nifi.flowfile.FlowFile) Configuration(org.apache.hadoop.conf.Configuration) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) HDFSRecordWriter(org.apache.nifi.processors.hadoop.record.HDFSRecordWriter) AtomicReference(java.util.concurrent.atomic.AtomicReference) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) ProcessException(org.apache.nifi.processor.exception.ProcessException) RecordReaderFactoryException(org.apache.nifi.processors.hadoop.exception.RecordReaderFactoryException) FlowFileAccessException(org.apache.nifi.processor.exception.FlowFileAccessException) FailureException(org.apache.nifi.processors.hadoop.exception.FailureException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) StopWatch(org.apache.nifi.util.StopWatch) BufferedInputStream(java.io.BufferedInputStream) FileSystem(org.apache.hadoop.fs.FileSystem) RecordReaderFactoryException(org.apache.nifi.processors.hadoop.exception.RecordReaderFactoryException) RecordSet(org.apache.nifi.serialization.record.RecordSet) HashMap(java.util.HashMap) Map(java.util.Map) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 14 with FlowFileAccessException

use of org.apache.nifi.processor.exception.FlowFileAccessException in project nifi by apache.

the class StandardProcessSession method importFrom.

@Override
public FlowFile importFrom(final Path source, final boolean keepSourceFile, FlowFile destination) {
    verifyTaskActive();
    destination = validateRecordState(destination);
    // TODO: find a better solution. With Windows 7 and Java 7 (very early update, at least), Files.isWritable(source.getParent()) returns false, even when it should be true.
    if (!keepSourceFile && !Files.isWritable(source.getParent()) && !source.getParent().toFile().canWrite()) {
        // If we do NOT want to keep the file, ensure that we can delete it, or else error.
        throw new FlowFileAccessException("Cannot write to path " + source.getParent().toFile().getAbsolutePath() + " so cannot delete file; will not import.");
    }
    final StandardRepositoryRecord record = records.get(destination);
    final ContentClaim newClaim;
    final long claimOffset;
    try {
        newClaim = context.getContentRepository().create(context.getConnectable().isLossTolerant());
        claimLog.debug("Creating ContentClaim {} for 'importFrom' for {}", newClaim, destination);
    } catch (final IOException e) {
        throw new FlowFileAccessException("Unable to create ContentClaim due to " + e.toString(), e);
    }
    claimOffset = 0L;
    long newSize = 0L;
    try {
        newSize = context.getContentRepository().importFrom(source, newClaim);
        bytesWritten += newSize;
        bytesRead += newSize;
    } catch (final Throwable t) {
        destroyContent(newClaim);
        throw new FlowFileAccessException("Failed to import data from " + source + " for " + destination + " due to " + t.toString(), t);
    }
    removeTemporaryClaim(record);
    final FlowFileRecord newFile = new StandardFlowFileRecord.Builder().fromFlowFile(record.getCurrent()).contentClaim(newClaim).contentClaimOffset(claimOffset).size(newSize).addAttribute(CoreAttributes.FILENAME.key(), source.toFile().getName()).build();
    record.setWorking(newFile, CoreAttributes.FILENAME.key(), source.toFile().getName());
    if (!keepSourceFile) {
        deleteOnCommit.put(newFile, source);
    }
    return newFile;
}
Also used : FlowFileAccessException(org.apache.nifi.processor.exception.FlowFileAccessException) ContentClaim(org.apache.nifi.controller.repository.claim.ContentClaim) ProvenanceEventBuilder(org.apache.nifi.provenance.ProvenanceEventBuilder) IOException(java.io.IOException)

Example 15 with FlowFileAccessException

use of org.apache.nifi.processor.exception.FlowFileAccessException in project nifi by apache.

the class StandardProcessSession method write.

@Override
public FlowFile write(FlowFile source, final StreamCallback writer) {
    verifyTaskActive();
    source = validateRecordState(source);
    final StandardRepositoryRecord record = records.get(source);
    final ContentClaim currClaim = record.getCurrentClaim();
    long writtenToFlowFile = 0L;
    ContentClaim newClaim = null;
    try {
        newClaim = claimCache.getContentClaim();
        claimLog.debug("Creating ContentClaim {} for 'write' for {}", newClaim, source);
        ensureNotAppending(newClaim);
        if (currClaim != null) {
            claimCache.flush(currClaim.getResourceClaim());
        }
        try (final InputStream is = getInputStream(source, currClaim, record.getCurrentClaimOffset(), true);
            final InputStream limitedIn = new LimitedInputStream(is, source.getSize());
            final InputStream disableOnCloseIn = new DisableOnCloseInputStream(limitedIn);
            final ByteCountingInputStream countingIn = new ByteCountingInputStream(disableOnCloseIn, bytesRead);
            final OutputStream os = claimCache.write(newClaim);
            final OutputStream disableOnCloseOut = new DisableOnCloseOutputStream(os);
            final ByteCountingOutputStream countingOut = new ByteCountingOutputStream(disableOnCloseOut)) {
            writeRecursionSet.add(source);
            // We want to differentiate between IOExceptions thrown by the repository and IOExceptions thrown from
            // Processor code. As a result, as have the FlowFileAccessInputStream that catches IOException from the repository
            // and translates into either FlowFileAccessException or ContentNotFoundException. We keep track of any
            // ContentNotFoundException because if it is thrown, the Processor code may catch it and do something else with it
            // but in reality, if it is thrown, we want to know about it and handle it, even if the Processor code catches it.
            final FlowFileAccessInputStream ffais = new FlowFileAccessInputStream(countingIn, source, currClaim);
            final FlowFileAccessOutputStream ffaos = new FlowFileAccessOutputStream(countingOut, source);
            boolean cnfeThrown = false;
            try {
                writer.process(createTaskTerminationStream(ffais), createTaskTerminationStream(ffaos));
            } catch (final ContentNotFoundException cnfe) {
                cnfeThrown = true;
                throw cnfe;
            } finally {
                writtenToFlowFile = countingOut.getBytesWritten();
                this.bytesWritten += writtenToFlowFile;
                this.bytesRead += countingIn.getBytesRead();
                writeRecursionSet.remove(source);
                // if cnfeThrown is true, we don't need to re-thrown the Exception; it will propagate.
                if (!cnfeThrown && ffais.getContentNotFoundException() != null) {
                    throw ffais.getContentNotFoundException();
                }
            }
        }
    } catch (final ContentNotFoundException nfe) {
        destroyContent(newClaim);
        handleContentNotFound(nfe, record);
    } catch (final IOException ioe) {
        destroyContent(newClaim);
        throw new ProcessException("IOException thrown from " + connectableDescription + ": " + ioe.toString(), ioe);
    } catch (final FlowFileAccessException ffae) {
        destroyContent(newClaim);
        throw ffae;
    } catch (final Throwable t) {
        destroyContent(newClaim);
        throw t;
    }
    removeTemporaryClaim(record);
    final FlowFileRecord newFile = new StandardFlowFileRecord.Builder().fromFlowFile(record.getCurrent()).contentClaim(newClaim).contentClaimOffset(Math.max(0L, newClaim.getLength() - writtenToFlowFile)).size(writtenToFlowFile).build();
    record.setWorking(newFile);
    return newFile;
}
Also used : FlowFileAccessOutputStream(org.apache.nifi.controller.repository.io.FlowFileAccessOutputStream) FlowFileAccessException(org.apache.nifi.processor.exception.FlowFileAccessException) ByteArrayInputStream(java.io.ByteArrayInputStream) TaskTerminationInputStream(org.apache.nifi.controller.repository.io.TaskTerminationInputStream) ByteCountingInputStream(org.apache.nifi.stream.io.ByteCountingInputStream) FlowFileAccessInputStream(org.apache.nifi.controller.repository.io.FlowFileAccessInputStream) LimitedInputStream(org.apache.nifi.controller.repository.io.LimitedInputStream) DisableOnCloseInputStream(org.apache.nifi.controller.repository.io.DisableOnCloseInputStream) InputStream(java.io.InputStream) LimitedInputStream(org.apache.nifi.controller.repository.io.LimitedInputStream) ByteCountingOutputStream(org.apache.nifi.stream.io.ByteCountingOutputStream) DisableOnCloseOutputStream(org.apache.nifi.controller.repository.io.DisableOnCloseOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) FlowFileAccessOutputStream(org.apache.nifi.controller.repository.io.FlowFileAccessOutputStream) OutputStream(java.io.OutputStream) TaskTerminationOutputStream(org.apache.nifi.controller.repository.io.TaskTerminationOutputStream) ByteCountingInputStream(org.apache.nifi.stream.io.ByteCountingInputStream) DisableOnCloseOutputStream(org.apache.nifi.controller.repository.io.DisableOnCloseOutputStream) IOException(java.io.IOException) ByteCountingOutputStream(org.apache.nifi.stream.io.ByteCountingOutputStream) ContentClaim(org.apache.nifi.controller.repository.claim.ContentClaim) ProcessException(org.apache.nifi.processor.exception.ProcessException) DisableOnCloseInputStream(org.apache.nifi.controller.repository.io.DisableOnCloseInputStream) FlowFileAccessInputStream(org.apache.nifi.controller.repository.io.FlowFileAccessInputStream)

Aggregations

FlowFileAccessException (org.apache.nifi.processor.exception.FlowFileAccessException)25 IOException (java.io.IOException)23 FlowFile (org.apache.nifi.flowfile.FlowFile)11 ProcessException (org.apache.nifi.processor.exception.ProcessException)11 InputStream (java.io.InputStream)9 OutputStream (java.io.OutputStream)7 ContentClaim (org.apache.nifi.controller.repository.claim.ContentClaim)6 BufferedOutputStream (java.io.BufferedOutputStream)5 ByteArrayInputStream (java.io.ByteArrayInputStream)5 HashMap (java.util.HashMap)5 DisableOnCloseInputStream (org.apache.nifi.controller.repository.io.DisableOnCloseInputStream)5 FlowFileAccessInputStream (org.apache.nifi.controller.repository.io.FlowFileAccessInputStream)5 LimitedInputStream (org.apache.nifi.controller.repository.io.LimitedInputStream)5 TaskTerminationInputStream (org.apache.nifi.controller.repository.io.TaskTerminationInputStream)5 ByteCountingInputStream (org.apache.nifi.stream.io.ByteCountingInputStream)5 Map (java.util.Map)4 AtomicReference (java.util.concurrent.atomic.AtomicReference)4 DisableOnCloseOutputStream (org.apache.nifi.controller.repository.io.DisableOnCloseOutputStream)4 FlowFileAccessOutputStream (org.apache.nifi.controller.repository.io.FlowFileAccessOutputStream)4 TaskTerminationOutputStream (org.apache.nifi.controller.repository.io.TaskTerminationOutputStream)4