Search in sources :

Example 41 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class StandardRemoteGroupPort method transferFlowFiles.

private int transferFlowFiles(final Transaction transaction, final ProcessContext context, final ProcessSession session, final FlowFile firstFlowFile) throws IOException, ProtocolException {
    FlowFile flowFile = firstFlowFile;
    try {
        final String userDn = transaction.getCommunicant().getDistinguishedName();
        final long startSendingNanos = System.nanoTime();
        final StopWatch stopWatch = new StopWatch(true);
        long bytesSent = 0L;
        final SiteToSiteClientConfig siteToSiteClientConfig = getSiteToSiteClient().getConfig();
        final long maxBatchBytes = siteToSiteClientConfig.getPreferredBatchSize();
        final int maxBatchCount = siteToSiteClientConfig.getPreferredBatchCount();
        final long preferredBatchDuration = siteToSiteClientConfig.getPreferredBatchDuration(TimeUnit.NANOSECONDS);
        final long maxBatchDuration = preferredBatchDuration > 0 ? preferredBatchDuration : BATCH_SEND_NANOS;
        final Set<FlowFile> flowFilesSent = new HashSet<>();
        boolean continueTransaction = true;
        while (continueTransaction) {
            final long startNanos = System.nanoTime();
            // call codec.encode within a session callback so that we have the InputStream to read the FlowFile
            final FlowFile toWrap = flowFile;
            session.read(flowFile, new InputStreamCallback() {

                @Override
                public void process(final InputStream in) throws IOException {
                    final DataPacket dataPacket = new StandardDataPacket(toWrap.getAttributes(), in, toWrap.getSize());
                    transaction.send(dataPacket);
                }
            });
            final long transferNanos = System.nanoTime() - startNanos;
            final long transferMillis = TimeUnit.MILLISECONDS.convert(transferNanos, TimeUnit.NANOSECONDS);
            flowFilesSent.add(flowFile);
            bytesSent += flowFile.getSize();
            logger.debug("{} Sent {} to {}", this, flowFile, transaction.getCommunicant().getUrl());
            final String transitUri = transaction.getCommunicant().createTransitUri(flowFile.getAttribute(CoreAttributes.UUID.key()));
            session.getProvenanceReporter().send(flowFile, transitUri, "Remote DN=" + userDn, transferMillis, false);
            session.remove(flowFile);
            final long sendingNanos = System.nanoTime() - startSendingNanos;
            if (maxBatchCount > 0 && flowFilesSent.size() >= maxBatchCount) {
                flowFile = null;
            } else if (maxBatchBytes > 0 && bytesSent >= maxBatchBytes) {
                flowFile = null;
            } else if (sendingNanos >= maxBatchDuration) {
                flowFile = null;
            } else {
                flowFile = session.get();
            }
            continueTransaction = (flowFile != null);
        }
        transaction.confirm();
        // consume input stream entirely, ignoring its contents. If we
        // don't do this, the Connection will not be returned to the pool
        stopWatch.stop();
        final String uploadDataRate = stopWatch.calculateDataRate(bytesSent);
        final long uploadMillis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
        final String dataSize = FormatUtils.formatDataSize(bytesSent);
        transaction.complete();
        session.commit();
        final String flowFileDescription = (flowFilesSent.size() < 20) ? flowFilesSent.toString() : flowFilesSent.size() + " FlowFiles";
        logger.info("{} Successfully sent {} ({}) to {} in {} milliseconds at a rate of {}", new Object[] { this, flowFileDescription, dataSize, transaction.getCommunicant().getUrl(), uploadMillis, uploadDataRate });
        return flowFilesSent.size();
    } catch (final Exception e) {
        session.rollback();
        throw e;
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) SiteToSiteClientConfig(org.apache.nifi.remote.client.SiteToSiteClientConfig) InputStream(java.io.InputStream) StandardDataPacket(org.apache.nifi.remote.util.StandardDataPacket) IOException(java.io.IOException) DataPacket(org.apache.nifi.remote.protocol.DataPacket) StandardDataPacket(org.apache.nifi.remote.util.StandardDataPacket) UnreachableClusterException(org.apache.nifi.remote.exception.UnreachableClusterException) ProtocolException(org.apache.nifi.remote.exception.ProtocolException) PortNotRunningException(org.apache.nifi.remote.exception.PortNotRunningException) UnknownPortException(org.apache.nifi.remote.exception.UnknownPortException) IOException(java.io.IOException) StopWatch(org.apache.nifi.util.StopWatch) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) HashSet(java.util.HashSet)

Example 42 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class AbstractFlowFileServerProtocol method transferFlowFiles.

@Override
public int transferFlowFiles(final Peer peer, final ProcessContext context, final ProcessSession session, final FlowFileCodec codec) throws IOException, ProtocolException {
    if (!handshakeCompleted) {
        throw new IllegalStateException("Handshake has not been completed");
    }
    if (shutdown) {
        throw new IllegalStateException("Protocol is shutdown");
    }
    logger.debug("{} Sending FlowFiles to {}", this, peer);
    final CommunicationsSession commsSession = peer.getCommunicationsSession();
    String remoteDn = commsSession.getUserDn();
    if (remoteDn == null) {
        remoteDn = "none";
    }
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        // we have no data to send. Notify the peer.
        logger.debug("{} No data to send to {}", this, peer);
        writeTransactionResponse(true, ResponseCode.NO_MORE_DATA, commsSession);
        return 0;
    }
    // we have data to send.
    logger.debug("{} Data is available to send to {}", this, peer);
    writeTransactionResponse(true, ResponseCode.MORE_DATA, commsSession);
    final StopWatch stopWatch = new StopWatch(true);
    long bytesSent = 0L;
    final Set<FlowFile> flowFilesSent = new HashSet<>();
    final CRC32 crc = new CRC32();
    // send data until we reach some batch size
    boolean continueTransaction = true;
    final long startNanos = System.nanoTime();
    String calculatedCRC = "";
    OutputStream os = new DataOutputStream(commsSession.getOutput().getOutputStream());
    while (continueTransaction) {
        final boolean useGzip = handshakeProperties.isUseGzip();
        final OutputStream flowFileOutputStream = useGzip ? new CompressionOutputStream(os) : os;
        logger.debug("{} Sending {} to {}", new Object[] { this, flowFile, peer });
        final CheckedOutputStream checkedOutputStream = new CheckedOutputStream(flowFileOutputStream, crc);
        final StopWatch transferWatch = new StopWatch(true);
        final FlowFile toSend = flowFile;
        session.read(flowFile, new InputStreamCallback() {

            @Override
            public void process(final InputStream in) throws IOException {
                final DataPacket dataPacket = new StandardDataPacket(toSend.getAttributes(), in, toSend.getSize());
                codec.encode(dataPacket, checkedOutputStream);
            }
        });
        final long transmissionMillis = transferWatch.getElapsed(TimeUnit.MILLISECONDS);
        // (CompressionOutputStream will not close the underlying stream when it's closed)
        if (useGzip) {
            checkedOutputStream.close();
        }
        flowFilesSent.add(flowFile);
        bytesSent += flowFile.getSize();
        final String transitUri = createTransitUri(peer, flowFile.getAttribute(CoreAttributes.UUID.key()));
        session.getProvenanceReporter().send(flowFile, transitUri, "Remote Host=" + peer.getHost() + ", Remote DN=" + remoteDn, transmissionMillis, false);
        session.remove(flowFile);
        // determine if we should check for more data on queue.
        final long sendingNanos = System.nanoTime() - startNanos;
        boolean poll = true;
        double batchDurationNanos = handshakeProperties.getBatchDurationNanos();
        if (sendingNanos >= batchDurationNanos && batchDurationNanos > 0L) {
            poll = false;
        }
        double batchBytes = handshakeProperties.getBatchBytes();
        if (bytesSent >= batchBytes && batchBytes > 0L) {
            poll = false;
        }
        double batchCount = handshakeProperties.getBatchCount();
        if (flowFilesSent.size() >= batchCount && batchCount > 0) {
            poll = false;
        }
        if (batchDurationNanos == 0 && batchBytes == 0 && batchCount == 0) {
            poll = (sendingNanos < DEFAULT_BATCH_NANOS);
        }
        if (poll) {
            // we've not elapsed the requested sending duration, so get more data.
            flowFile = session.get();
        } else {
            flowFile = null;
        }
        continueTransaction = (flowFile != null);
        if (continueTransaction) {
            logger.debug("{} Sending ContinueTransaction indicator to {}", this, peer);
            writeTransactionResponse(true, ResponseCode.CONTINUE_TRANSACTION, commsSession);
        } else {
            logger.debug("{} Sending FinishTransaction indicator to {}", this, peer);
            writeTransactionResponse(true, ResponseCode.FINISH_TRANSACTION, commsSession);
            calculatedCRC = String.valueOf(checkedOutputStream.getChecksum().getValue());
        }
    }
    FlowFileTransaction transaction = new FlowFileTransaction(session, context, stopWatch, bytesSent, flowFilesSent, calculatedCRC);
    return commitTransferTransaction(peer, transaction);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) CompressionOutputStream(org.apache.nifi.remote.io.CompressionOutputStream) CRC32(java.util.zip.CRC32) DataOutputStream(java.io.DataOutputStream) DataInputStream(java.io.DataInputStream) CheckedInputStream(java.util.zip.CheckedInputStream) CompressionInputStream(org.apache.nifi.remote.io.CompressionInputStream) InputStream(java.io.InputStream) StandardDataPacket(org.apache.nifi.remote.util.StandardDataPacket) CompressionOutputStream(org.apache.nifi.remote.io.CompressionOutputStream) DataOutputStream(java.io.DataOutputStream) OutputStream(java.io.OutputStream) CheckedOutputStream(java.util.zip.CheckedOutputStream) IOException(java.io.IOException) StandardDataPacket(org.apache.nifi.remote.util.StandardDataPacket) StopWatch(org.apache.nifi.util.StopWatch) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) CheckedOutputStream(java.util.zip.CheckedOutputStream) HashSet(java.util.HashSet)

Example 43 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class GetCouchbaseKey method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile inFile = session.get();
    if (inFile == null) {
        return;
    }
    final long startNanos = System.nanoTime();
    final ComponentLog logger = getLogger();
    String docId = null;
    if (!StringUtils.isEmpty(context.getProperty(DOC_ID).getValue())) {
        docId = context.getProperty(DOC_ID).evaluateAttributeExpressions(inFile).getValue();
    } else {
        final byte[] content = new byte[(int) inFile.getSize()];
        session.read(inFile, new InputStreamCallback() {

            @Override
            public void process(final InputStream in) throws IOException {
                StreamUtils.fillBuffer(in, content, true);
            }
        });
        docId = new String(content, StandardCharsets.UTF_8);
    }
    if (StringUtils.isEmpty(docId)) {
        throw new ProcessException("Please check 'Document Id' setting. Couldn't get document id from " + inFile);
    }
    try {
        final Document<?> doc;
        final byte[] content;
        final Bucket bucket = openBucket(context);
        final DocumentType documentType = DocumentType.valueOf(context.getProperty(DOCUMENT_TYPE).getValue());
        switch(documentType) {
            case Json:
                {
                    RawJsonDocument document = bucket.get(docId, RawJsonDocument.class);
                    if (document == null) {
                        doc = null;
                        content = null;
                    } else {
                        content = document.content().getBytes(StandardCharsets.UTF_8);
                        doc = document;
                    }
                    break;
                }
            case Binary:
                {
                    BinaryDocument document = bucket.get(docId, BinaryDocument.class);
                    if (document == null) {
                        doc = null;
                        content = null;
                    } else {
                        content = document.content().array();
                        doc = document;
                    }
                    break;
                }
            default:
                {
                    doc = null;
                    content = null;
                }
        }
        if (doc == null) {
            logger.error("Document {} was not found in {}; routing {} to failure", new Object[] { docId, getTransitUrl(context, docId), inFile });
            inFile = session.putAttribute(inFile, CouchbaseAttributes.Exception.key(), DocumentDoesNotExistException.class.getName());
            session.transfer(inFile, REL_FAILURE);
            return;
        }
        FlowFile outFile = session.create(inFile);
        outFile = session.write(outFile, new OutputStreamCallback() {

            @Override
            public void process(final OutputStream out) throws IOException {
                out.write(content);
            }
        });
        final Map<String, String> updatedAttrs = new HashMap<>();
        updatedAttrs.put(CouchbaseAttributes.Cluster.key(), context.getProperty(COUCHBASE_CLUSTER_SERVICE).getValue());
        updatedAttrs.put(CouchbaseAttributes.Bucket.key(), context.getProperty(BUCKET_NAME).getValue());
        updatedAttrs.put(CouchbaseAttributes.DocId.key(), docId);
        updatedAttrs.put(CouchbaseAttributes.Cas.key(), String.valueOf(doc.cas()));
        updatedAttrs.put(CouchbaseAttributes.Expiry.key(), String.valueOf(doc.expiry()));
        outFile = session.putAllAttributes(outFile, updatedAttrs);
        final long fetchMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
        session.getProvenanceReporter().fetch(outFile, getTransitUrl(context, docId), fetchMillis);
        session.transfer(outFile, REL_SUCCESS);
        session.transfer(inFile, REL_ORIGINAL);
    } catch (final CouchbaseException e) {
        String errMsg = String.format("Getting document %s from Couchbase Server using %s failed due to %s", docId, inFile, e);
        handleCouchbaseException(context, session, logger, inFile, e, errMsg);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) RawJsonDocument(com.couchbase.client.java.document.RawJsonDocument) BinaryDocument(com.couchbase.client.java.document.BinaryDocument) ProcessException(org.apache.nifi.processor.exception.ProcessException) CouchbaseException(com.couchbase.client.core.CouchbaseException) Bucket(com.couchbase.client.java.Bucket) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback)

Example 44 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class FuzzyHashContent method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final ComponentLog logger = getLogger();
    String algorithm = context.getProperty(HASH_ALGORITHM).getValue();
    if (checkMinimumAlgorithmRequirements(algorithm, flowFile) == false) {
        logger.error("The content of '{}' is smaller than the minimum required by {}, routing to failure", new Object[] { flowFile, algorithm });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    final AtomicReference<String> hashValueHolder = new AtomicReference<>(null);
    try {
        session.read(flowFile, new InputStreamCallback() {

            @Override
            public void process(final InputStream in) throws IOException {
                try (ByteArrayOutputStream holder = new ByteArrayOutputStream()) {
                    StreamUtils.copy(in, holder);
                    String hashValue = generateHash(algorithm, holder.toString());
                    if (StringUtils.isBlank(hashValue) == false) {
                        hashValueHolder.set(hashValue);
                    }
                }
            }
        });
        final String attributeName = context.getProperty(ATTRIBUTE_NAME).getValue();
        flowFile = session.putAttribute(flowFile, attributeName, hashValueHolder.get());
        logger.info("Successfully added attribute '{}' to {} with a value of {}; routing to success", new Object[] { attributeName, flowFile, hashValueHolder.get() });
        session.getProvenanceReporter().modifyAttributes(flowFile);
        session.transfer(flowFile, REL_SUCCESS);
    } catch (final InsufficientComplexityException | ProcessException e) {
        logger.error("Failed to process {} due to {}; routing to failure", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) ByteArrayOutputStream(java.io.ByteArrayOutputStream) InsufficientComplexityException(com.idealista.tlsh.exceptions.InsufficientComplexityException) ComponentLog(org.apache.nifi.logging.ComponentLog) ProcessException(org.apache.nifi.processor.exception.ProcessException) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback)

Example 45 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class ExtractEmailHeaders method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    final ComponentLog logger = getLogger();
    final List<FlowFile> invalidFlowFilesList = new ArrayList<>();
    final List<FlowFile> processedFlowFilesList = new ArrayList<>();
    final FlowFile originalFlowFile = session.get();
    if (originalFlowFile == null) {
        return;
    }
    final String requireStrictAddresses = context.getProperty(STRICT_PARSING).getValue();
    final List<String> capturedHeadersList = Arrays.asList(context.getProperty(CAPTURED_HEADERS).getValue().toLowerCase().split(":"));
    final Map<String, String> attributes = new HashMap<>();
    session.read(originalFlowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream rawIn) throws IOException {
            try (final InputStream in = new BufferedInputStream(rawIn)) {
                Properties props = new Properties();
                props.put("mail.mime.address.strict", requireStrictAddresses);
                Session mailSession = Session.getInstance(props);
                MimeMessage originalMessage = new MimeMessage(mailSession, in);
                MimeMessageParser parser = new MimeMessageParser(originalMessage).parse();
                // RFC-2822 determines that a message must have a "From:" header
                // if a message lacks the field, it is flagged as invalid
                Address[] from = originalMessage.getFrom();
                if (from == null) {
                    throw new MessagingException("Message failed RFC-2822 validation: No Sender");
                }
                Date sentDate = originalMessage.getSentDate();
                if (sentDate == null) {
                    // Throws MessageException due to lack of minimum required headers
                    throw new MessagingException("Message failed RFC-2822 validation: No Sent Date");
                } else if (capturedHeadersList.size() > 0) {
                    Enumeration headers = originalMessage.getAllHeaders();
                    while (headers.hasMoreElements()) {
                        Header header = (Header) headers.nextElement();
                        if (StringUtils.isNotEmpty(header.getValue()) && capturedHeadersList.contains(header.getName().toLowerCase())) {
                            attributes.put("email.headers." + header.getName().toLowerCase(), header.getValue());
                        }
                    }
                }
                putAddressListInAttributes(attributes, EMAIL_HEADER_TO, originalMessage.getRecipients(Message.RecipientType.TO));
                putAddressListInAttributes(attributes, EMAIL_HEADER_CC, originalMessage.getRecipients(Message.RecipientType.CC));
                putAddressListInAttributes(attributes, EMAIL_HEADER_BCC, originalMessage.getRecipients(Message.RecipientType.BCC));
                // RFC-2822 specifies "From" as mailbox-list
                putAddressListInAttributes(attributes, EMAIL_HEADER_FROM, originalMessage.getFrom());
                if (StringUtils.isNotEmpty(originalMessage.getMessageID())) {
                    attributes.put(EMAIL_HEADER_MESSAGE_ID, originalMessage.getMessageID());
                }
                if (originalMessage.getReceivedDate() != null) {
                    attributes.put(EMAIL_HEADER_RECV_DATE, originalMessage.getReceivedDate().toString());
                }
                if (originalMessage.getSentDate() != null) {
                    attributes.put(EMAIL_HEADER_SENT_DATE, originalMessage.getSentDate().toString());
                }
                if (StringUtils.isNotEmpty(originalMessage.getSubject())) {
                    attributes.put(EMAIL_HEADER_SUBJECT, originalMessage.getSubject());
                }
                // Zeroes EMAIL_ATTACHMENT_COUNT
                attributes.put(EMAIL_ATTACHMENT_COUNT, "0");
                // But insert correct value if attachments are present
                if (parser.hasAttachments()) {
                    attributes.put(EMAIL_ATTACHMENT_COUNT, String.valueOf(parser.getAttachmentList().size()));
                }
            } catch (Exception e) {
                // Message is invalid or triggered an error during parsing
                attributes.clear();
                logger.error("Could not parse the flowfile {} as an email, treating as failure", new Object[] { originalFlowFile, e });
                invalidFlowFilesList.add(originalFlowFile);
            }
        }
    });
    if (attributes.size() > 0) {
        FlowFile updatedFlowFile = session.putAllAttributes(originalFlowFile, attributes);
        logger.info("Extracted {} headers into {} file", new Object[] { attributes.size(), updatedFlowFile });
        processedFlowFilesList.add(updatedFlowFile);
    }
    session.transfer(processedFlowFilesList, REL_SUCCESS);
    session.transfer(invalidFlowFilesList, REL_FAILURE);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) Enumeration(java.util.Enumeration) HashMap(java.util.HashMap) MessagingException(javax.mail.MessagingException) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Properties(java.util.Properties) MimeMessageParser(org.apache.commons.mail.util.MimeMessageParser) ComponentLog(org.apache.nifi.logging.ComponentLog) Date(java.util.Date) MessagingException(javax.mail.MessagingException) IOException(java.io.IOException) Header(javax.mail.Header) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) MimeMessage(javax.mail.internet.MimeMessage) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) ProcessSession(org.apache.nifi.processor.ProcessSession) Session(javax.mail.Session)

Aggregations

IOException (java.io.IOException)80 InputStream (java.io.InputStream)80 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)80 FlowFile (org.apache.nifi.flowfile.FlowFile)62 ProcessException (org.apache.nifi.processor.exception.ProcessException)35 ComponentLog (org.apache.nifi.logging.ComponentLog)27 HashMap (java.util.HashMap)25 AtomicReference (java.util.concurrent.atomic.AtomicReference)23 OutputStream (java.io.OutputStream)19 BufferedInputStream (java.io.BufferedInputStream)18 ArrayList (java.util.ArrayList)17 Map (java.util.Map)17 OutputStreamCallback (org.apache.nifi.processor.io.OutputStreamCallback)13 ByteArrayOutputStream (java.io.ByteArrayOutputStream)11 BufferedInputStream (org.apache.nifi.stream.io.BufferedInputStream)10 StopWatch (org.apache.nifi.util.StopWatch)10 HashSet (java.util.HashSet)9 Charset (java.nio.charset.Charset)8 FileInputStream (java.io.FileInputStream)7 ProcessSession (org.apache.nifi.processor.ProcessSession)7