use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class StandardRemoteGroupPort method transferFlowFiles.
private int transferFlowFiles(final Transaction transaction, final ProcessContext context, final ProcessSession session, final FlowFile firstFlowFile) throws IOException, ProtocolException {
FlowFile flowFile = firstFlowFile;
try {
final String userDn = transaction.getCommunicant().getDistinguishedName();
final long startSendingNanos = System.nanoTime();
final StopWatch stopWatch = new StopWatch(true);
long bytesSent = 0L;
final SiteToSiteClientConfig siteToSiteClientConfig = getSiteToSiteClient().getConfig();
final long maxBatchBytes = siteToSiteClientConfig.getPreferredBatchSize();
final int maxBatchCount = siteToSiteClientConfig.getPreferredBatchCount();
final long preferredBatchDuration = siteToSiteClientConfig.getPreferredBatchDuration(TimeUnit.NANOSECONDS);
final long maxBatchDuration = preferredBatchDuration > 0 ? preferredBatchDuration : BATCH_SEND_NANOS;
final Set<FlowFile> flowFilesSent = new HashSet<>();
boolean continueTransaction = true;
while (continueTransaction) {
final long startNanos = System.nanoTime();
// call codec.encode within a session callback so that we have the InputStream to read the FlowFile
final FlowFile toWrap = flowFile;
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
final DataPacket dataPacket = new StandardDataPacket(toWrap.getAttributes(), in, toWrap.getSize());
transaction.send(dataPacket);
}
});
final long transferNanos = System.nanoTime() - startNanos;
final long transferMillis = TimeUnit.MILLISECONDS.convert(transferNanos, TimeUnit.NANOSECONDS);
flowFilesSent.add(flowFile);
bytesSent += flowFile.getSize();
logger.debug("{} Sent {} to {}", this, flowFile, transaction.getCommunicant().getUrl());
final String transitUri = transaction.getCommunicant().createTransitUri(flowFile.getAttribute(CoreAttributes.UUID.key()));
session.getProvenanceReporter().send(flowFile, transitUri, "Remote DN=" + userDn, transferMillis, false);
session.remove(flowFile);
final long sendingNanos = System.nanoTime() - startSendingNanos;
if (maxBatchCount > 0 && flowFilesSent.size() >= maxBatchCount) {
flowFile = null;
} else if (maxBatchBytes > 0 && bytesSent >= maxBatchBytes) {
flowFile = null;
} else if (sendingNanos >= maxBatchDuration) {
flowFile = null;
} else {
flowFile = session.get();
}
continueTransaction = (flowFile != null);
}
transaction.confirm();
// consume input stream entirely, ignoring its contents. If we
// don't do this, the Connection will not be returned to the pool
stopWatch.stop();
final String uploadDataRate = stopWatch.calculateDataRate(bytesSent);
final long uploadMillis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
final String dataSize = FormatUtils.formatDataSize(bytesSent);
transaction.complete();
session.commit();
final String flowFileDescription = (flowFilesSent.size() < 20) ? flowFilesSent.toString() : flowFilesSent.size() + " FlowFiles";
logger.info("{} Successfully sent {} ({}) to {} in {} milliseconds at a rate of {}", new Object[] { this, flowFileDescription, dataSize, transaction.getCommunicant().getUrl(), uploadMillis, uploadDataRate });
return flowFilesSent.size();
} catch (final Exception e) {
session.rollback();
throw e;
}
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class AbstractFlowFileServerProtocol method transferFlowFiles.
@Override
public int transferFlowFiles(final Peer peer, final ProcessContext context, final ProcessSession session, final FlowFileCodec codec) throws IOException, ProtocolException {
if (!handshakeCompleted) {
throw new IllegalStateException("Handshake has not been completed");
}
if (shutdown) {
throw new IllegalStateException("Protocol is shutdown");
}
logger.debug("{} Sending FlowFiles to {}", this, peer);
final CommunicationsSession commsSession = peer.getCommunicationsSession();
String remoteDn = commsSession.getUserDn();
if (remoteDn == null) {
remoteDn = "none";
}
FlowFile flowFile = session.get();
if (flowFile == null) {
// we have no data to send. Notify the peer.
logger.debug("{} No data to send to {}", this, peer);
writeTransactionResponse(true, ResponseCode.NO_MORE_DATA, commsSession);
return 0;
}
// we have data to send.
logger.debug("{} Data is available to send to {}", this, peer);
writeTransactionResponse(true, ResponseCode.MORE_DATA, commsSession);
final StopWatch stopWatch = new StopWatch(true);
long bytesSent = 0L;
final Set<FlowFile> flowFilesSent = new HashSet<>();
final CRC32 crc = new CRC32();
// send data until we reach some batch size
boolean continueTransaction = true;
final long startNanos = System.nanoTime();
String calculatedCRC = "";
OutputStream os = new DataOutputStream(commsSession.getOutput().getOutputStream());
while (continueTransaction) {
final boolean useGzip = handshakeProperties.isUseGzip();
final OutputStream flowFileOutputStream = useGzip ? new CompressionOutputStream(os) : os;
logger.debug("{} Sending {} to {}", new Object[] { this, flowFile, peer });
final CheckedOutputStream checkedOutputStream = new CheckedOutputStream(flowFileOutputStream, crc);
final StopWatch transferWatch = new StopWatch(true);
final FlowFile toSend = flowFile;
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
final DataPacket dataPacket = new StandardDataPacket(toSend.getAttributes(), in, toSend.getSize());
codec.encode(dataPacket, checkedOutputStream);
}
});
final long transmissionMillis = transferWatch.getElapsed(TimeUnit.MILLISECONDS);
// (CompressionOutputStream will not close the underlying stream when it's closed)
if (useGzip) {
checkedOutputStream.close();
}
flowFilesSent.add(flowFile);
bytesSent += flowFile.getSize();
final String transitUri = createTransitUri(peer, flowFile.getAttribute(CoreAttributes.UUID.key()));
session.getProvenanceReporter().send(flowFile, transitUri, "Remote Host=" + peer.getHost() + ", Remote DN=" + remoteDn, transmissionMillis, false);
session.remove(flowFile);
// determine if we should check for more data on queue.
final long sendingNanos = System.nanoTime() - startNanos;
boolean poll = true;
double batchDurationNanos = handshakeProperties.getBatchDurationNanos();
if (sendingNanos >= batchDurationNanos && batchDurationNanos > 0L) {
poll = false;
}
double batchBytes = handshakeProperties.getBatchBytes();
if (bytesSent >= batchBytes && batchBytes > 0L) {
poll = false;
}
double batchCount = handshakeProperties.getBatchCount();
if (flowFilesSent.size() >= batchCount && batchCount > 0) {
poll = false;
}
if (batchDurationNanos == 0 && batchBytes == 0 && batchCount == 0) {
poll = (sendingNanos < DEFAULT_BATCH_NANOS);
}
if (poll) {
// we've not elapsed the requested sending duration, so get more data.
flowFile = session.get();
} else {
flowFile = null;
}
continueTransaction = (flowFile != null);
if (continueTransaction) {
logger.debug("{} Sending ContinueTransaction indicator to {}", this, peer);
writeTransactionResponse(true, ResponseCode.CONTINUE_TRANSACTION, commsSession);
} else {
logger.debug("{} Sending FinishTransaction indicator to {}", this, peer);
writeTransactionResponse(true, ResponseCode.FINISH_TRANSACTION, commsSession);
calculatedCRC = String.valueOf(checkedOutputStream.getChecksum().getValue());
}
}
FlowFileTransaction transaction = new FlowFileTransaction(session, context, stopWatch, bytesSent, flowFilesSent, calculatedCRC);
return commitTransferTransaction(peer, transaction);
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class GetCouchbaseKey method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile inFile = session.get();
if (inFile == null) {
return;
}
final long startNanos = System.nanoTime();
final ComponentLog logger = getLogger();
String docId = null;
if (!StringUtils.isEmpty(context.getProperty(DOC_ID).getValue())) {
docId = context.getProperty(DOC_ID).evaluateAttributeExpressions(inFile).getValue();
} else {
final byte[] content = new byte[(int) inFile.getSize()];
session.read(inFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
StreamUtils.fillBuffer(in, content, true);
}
});
docId = new String(content, StandardCharsets.UTF_8);
}
if (StringUtils.isEmpty(docId)) {
throw new ProcessException("Please check 'Document Id' setting. Couldn't get document id from " + inFile);
}
try {
final Document<?> doc;
final byte[] content;
final Bucket bucket = openBucket(context);
final DocumentType documentType = DocumentType.valueOf(context.getProperty(DOCUMENT_TYPE).getValue());
switch(documentType) {
case Json:
{
RawJsonDocument document = bucket.get(docId, RawJsonDocument.class);
if (document == null) {
doc = null;
content = null;
} else {
content = document.content().getBytes(StandardCharsets.UTF_8);
doc = document;
}
break;
}
case Binary:
{
BinaryDocument document = bucket.get(docId, BinaryDocument.class);
if (document == null) {
doc = null;
content = null;
} else {
content = document.content().array();
doc = document;
}
break;
}
default:
{
doc = null;
content = null;
}
}
if (doc == null) {
logger.error("Document {} was not found in {}; routing {} to failure", new Object[] { docId, getTransitUrl(context, docId), inFile });
inFile = session.putAttribute(inFile, CouchbaseAttributes.Exception.key(), DocumentDoesNotExistException.class.getName());
session.transfer(inFile, REL_FAILURE);
return;
}
FlowFile outFile = session.create(inFile);
outFile = session.write(outFile, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
out.write(content);
}
});
final Map<String, String> updatedAttrs = new HashMap<>();
updatedAttrs.put(CouchbaseAttributes.Cluster.key(), context.getProperty(COUCHBASE_CLUSTER_SERVICE).getValue());
updatedAttrs.put(CouchbaseAttributes.Bucket.key(), context.getProperty(BUCKET_NAME).getValue());
updatedAttrs.put(CouchbaseAttributes.DocId.key(), docId);
updatedAttrs.put(CouchbaseAttributes.Cas.key(), String.valueOf(doc.cas()));
updatedAttrs.put(CouchbaseAttributes.Expiry.key(), String.valueOf(doc.expiry()));
outFile = session.putAllAttributes(outFile, updatedAttrs);
final long fetchMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
session.getProvenanceReporter().fetch(outFile, getTransitUrl(context, docId), fetchMillis);
session.transfer(outFile, REL_SUCCESS);
session.transfer(inFile, REL_ORIGINAL);
} catch (final CouchbaseException e) {
String errMsg = String.format("Getting document %s from Couchbase Server using %s failed due to %s", docId, inFile, e);
handleCouchbaseException(context, session, logger, inFile, e, errMsg);
}
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class FuzzyHashContent method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final ComponentLog logger = getLogger();
String algorithm = context.getProperty(HASH_ALGORITHM).getValue();
if (checkMinimumAlgorithmRequirements(algorithm, flowFile) == false) {
logger.error("The content of '{}' is smaller than the minimum required by {}, routing to failure", new Object[] { flowFile, algorithm });
session.transfer(flowFile, REL_FAILURE);
return;
}
final AtomicReference<String> hashValueHolder = new AtomicReference<>(null);
try {
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
try (ByteArrayOutputStream holder = new ByteArrayOutputStream()) {
StreamUtils.copy(in, holder);
String hashValue = generateHash(algorithm, holder.toString());
if (StringUtils.isBlank(hashValue) == false) {
hashValueHolder.set(hashValue);
}
}
}
});
final String attributeName = context.getProperty(ATTRIBUTE_NAME).getValue();
flowFile = session.putAttribute(flowFile, attributeName, hashValueHolder.get());
logger.info("Successfully added attribute '{}' to {} with a value of {}; routing to success", new Object[] { attributeName, flowFile, hashValueHolder.get() });
session.getProvenanceReporter().modifyAttributes(flowFile);
session.transfer(flowFile, REL_SUCCESS);
} catch (final InsufficientComplexityException | ProcessException e) {
logger.error("Failed to process {} due to {}; routing to failure", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
}
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class ExtractEmailHeaders method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
final ComponentLog logger = getLogger();
final List<FlowFile> invalidFlowFilesList = new ArrayList<>();
final List<FlowFile> processedFlowFilesList = new ArrayList<>();
final FlowFile originalFlowFile = session.get();
if (originalFlowFile == null) {
return;
}
final String requireStrictAddresses = context.getProperty(STRICT_PARSING).getValue();
final List<String> capturedHeadersList = Arrays.asList(context.getProperty(CAPTURED_HEADERS).getValue().toLowerCase().split(":"));
final Map<String, String> attributes = new HashMap<>();
session.read(originalFlowFile, new InputStreamCallback() {
@Override
public void process(final InputStream rawIn) throws IOException {
try (final InputStream in = new BufferedInputStream(rawIn)) {
Properties props = new Properties();
props.put("mail.mime.address.strict", requireStrictAddresses);
Session mailSession = Session.getInstance(props);
MimeMessage originalMessage = new MimeMessage(mailSession, in);
MimeMessageParser parser = new MimeMessageParser(originalMessage).parse();
// RFC-2822 determines that a message must have a "From:" header
// if a message lacks the field, it is flagged as invalid
Address[] from = originalMessage.getFrom();
if (from == null) {
throw new MessagingException("Message failed RFC-2822 validation: No Sender");
}
Date sentDate = originalMessage.getSentDate();
if (sentDate == null) {
// Throws MessageException due to lack of minimum required headers
throw new MessagingException("Message failed RFC-2822 validation: No Sent Date");
} else if (capturedHeadersList.size() > 0) {
Enumeration headers = originalMessage.getAllHeaders();
while (headers.hasMoreElements()) {
Header header = (Header) headers.nextElement();
if (StringUtils.isNotEmpty(header.getValue()) && capturedHeadersList.contains(header.getName().toLowerCase())) {
attributes.put("email.headers." + header.getName().toLowerCase(), header.getValue());
}
}
}
putAddressListInAttributes(attributes, EMAIL_HEADER_TO, originalMessage.getRecipients(Message.RecipientType.TO));
putAddressListInAttributes(attributes, EMAIL_HEADER_CC, originalMessage.getRecipients(Message.RecipientType.CC));
putAddressListInAttributes(attributes, EMAIL_HEADER_BCC, originalMessage.getRecipients(Message.RecipientType.BCC));
// RFC-2822 specifies "From" as mailbox-list
putAddressListInAttributes(attributes, EMAIL_HEADER_FROM, originalMessage.getFrom());
if (StringUtils.isNotEmpty(originalMessage.getMessageID())) {
attributes.put(EMAIL_HEADER_MESSAGE_ID, originalMessage.getMessageID());
}
if (originalMessage.getReceivedDate() != null) {
attributes.put(EMAIL_HEADER_RECV_DATE, originalMessage.getReceivedDate().toString());
}
if (originalMessage.getSentDate() != null) {
attributes.put(EMAIL_HEADER_SENT_DATE, originalMessage.getSentDate().toString());
}
if (StringUtils.isNotEmpty(originalMessage.getSubject())) {
attributes.put(EMAIL_HEADER_SUBJECT, originalMessage.getSubject());
}
// Zeroes EMAIL_ATTACHMENT_COUNT
attributes.put(EMAIL_ATTACHMENT_COUNT, "0");
// But insert correct value if attachments are present
if (parser.hasAttachments()) {
attributes.put(EMAIL_ATTACHMENT_COUNT, String.valueOf(parser.getAttachmentList().size()));
}
} catch (Exception e) {
// Message is invalid or triggered an error during parsing
attributes.clear();
logger.error("Could not parse the flowfile {} as an email, treating as failure", new Object[] { originalFlowFile, e });
invalidFlowFilesList.add(originalFlowFile);
}
}
});
if (attributes.size() > 0) {
FlowFile updatedFlowFile = session.putAllAttributes(originalFlowFile, attributes);
logger.info("Extracted {} headers into {} file", new Object[] { attributes.size(), updatedFlowFile });
processedFlowFilesList.add(updatedFlowFile);
}
session.transfer(processedFlowFilesList, REL_SUCCESS);
session.transfer(invalidFlowFilesList, REL_FAILURE);
}
Aggregations