use of org.apache.nifi.util.StopWatch in project nifi by apache.
the class StandardRemoteGroupPort method transferFlowFiles.
private int transferFlowFiles(final Transaction transaction, final ProcessContext context, final ProcessSession session, final FlowFile firstFlowFile) throws IOException, ProtocolException {
FlowFile flowFile = firstFlowFile;
try {
final String userDn = transaction.getCommunicant().getDistinguishedName();
final long startSendingNanos = System.nanoTime();
final StopWatch stopWatch = new StopWatch(true);
long bytesSent = 0L;
final SiteToSiteClientConfig siteToSiteClientConfig = getSiteToSiteClient().getConfig();
final long maxBatchBytes = siteToSiteClientConfig.getPreferredBatchSize();
final int maxBatchCount = siteToSiteClientConfig.getPreferredBatchCount();
final long preferredBatchDuration = siteToSiteClientConfig.getPreferredBatchDuration(TimeUnit.NANOSECONDS);
final long maxBatchDuration = preferredBatchDuration > 0 ? preferredBatchDuration : BATCH_SEND_NANOS;
final Set<FlowFile> flowFilesSent = new HashSet<>();
boolean continueTransaction = true;
while (continueTransaction) {
final long startNanos = System.nanoTime();
// call codec.encode within a session callback so that we have the InputStream to read the FlowFile
final FlowFile toWrap = flowFile;
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
final DataPacket dataPacket = new StandardDataPacket(toWrap.getAttributes(), in, toWrap.getSize());
transaction.send(dataPacket);
}
});
final long transferNanos = System.nanoTime() - startNanos;
final long transferMillis = TimeUnit.MILLISECONDS.convert(transferNanos, TimeUnit.NANOSECONDS);
flowFilesSent.add(flowFile);
bytesSent += flowFile.getSize();
logger.debug("{} Sent {} to {}", this, flowFile, transaction.getCommunicant().getUrl());
final String transitUri = transaction.getCommunicant().createTransitUri(flowFile.getAttribute(CoreAttributes.UUID.key()));
session.getProvenanceReporter().send(flowFile, transitUri, "Remote DN=" + userDn, transferMillis, false);
session.remove(flowFile);
final long sendingNanos = System.nanoTime() - startSendingNanos;
if (maxBatchCount > 0 && flowFilesSent.size() >= maxBatchCount) {
flowFile = null;
} else if (maxBatchBytes > 0 && bytesSent >= maxBatchBytes) {
flowFile = null;
} else if (sendingNanos >= maxBatchDuration) {
flowFile = null;
} else {
flowFile = session.get();
}
continueTransaction = (flowFile != null);
}
transaction.confirm();
// consume input stream entirely, ignoring its contents. If we
// don't do this, the Connection will not be returned to the pool
stopWatch.stop();
final String uploadDataRate = stopWatch.calculateDataRate(bytesSent);
final long uploadMillis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
final String dataSize = FormatUtils.formatDataSize(bytesSent);
transaction.complete();
session.commit();
final String flowFileDescription = (flowFilesSent.size() < 20) ? flowFilesSent.toString() : flowFilesSent.size() + " FlowFiles";
logger.info("{} Successfully sent {} ({}) to {} in {} milliseconds at a rate of {}", new Object[] { this, flowFileDescription, dataSize, transaction.getCommunicant().getUrl(), uploadMillis, uploadDataRate });
return flowFilesSent.size();
} catch (final Exception e) {
session.rollback();
throw e;
}
}
use of org.apache.nifi.util.StopWatch in project nifi by apache.
the class StandardRemoteGroupPort method receiveFlowFiles.
private int receiveFlowFiles(final Transaction transaction, final ProcessContext context, final ProcessSession session) throws IOException, ProtocolException {
final String userDn = transaction.getCommunicant().getDistinguishedName();
final StopWatch stopWatch = new StopWatch(true);
final Set<FlowFile> flowFilesReceived = new HashSet<>();
long bytesReceived = 0L;
while (true) {
final long start = System.nanoTime();
final DataPacket dataPacket = transaction.receive();
if (dataPacket == null) {
break;
}
FlowFile flowFile = session.create();
flowFile = session.putAllAttributes(flowFile, dataPacket.getAttributes());
final Communicant communicant = transaction.getCommunicant();
final String host = StringUtils.isEmpty(communicant.getHost()) ? "unknown" : communicant.getHost();
final String port = communicant.getPort() < 0 ? "unknown" : String.valueOf(communicant.getPort());
final Map<String, String> attributes = new HashMap<>(2);
attributes.put(SiteToSiteAttributes.S2S_HOST.key(), host);
attributes.put(SiteToSiteAttributes.S2S_ADDRESS.key(), host + ":" + port);
flowFile = session.putAllAttributes(flowFile, attributes);
flowFile = session.importFrom(dataPacket.getData(), flowFile);
final long receiveNanos = System.nanoTime() - start;
flowFilesReceived.add(flowFile);
String sourceFlowFileIdentifier = dataPacket.getAttributes().get(CoreAttributes.UUID.key());
if (sourceFlowFileIdentifier == null) {
sourceFlowFileIdentifier = "<Unknown Identifier>";
}
final String transitUri = transaction.getCommunicant().createTransitUri(sourceFlowFileIdentifier);
session.getProvenanceReporter().receive(flowFile, transitUri, "urn:nifi:" + sourceFlowFileIdentifier, "Remote DN=" + userDn, TimeUnit.NANOSECONDS.toMillis(receiveNanos));
session.transfer(flowFile, Relationship.ANONYMOUS);
bytesReceived += dataPacket.getSize();
}
// Confirm that what we received was the correct data.
transaction.confirm();
// Commit the session so that we have persisted the data
session.commit();
transaction.complete();
if (!flowFilesReceived.isEmpty()) {
stopWatch.stop();
final String flowFileDescription = flowFilesReceived.size() < 20 ? flowFilesReceived.toString() : flowFilesReceived.size() + " FlowFiles";
final String uploadDataRate = stopWatch.calculateDataRate(bytesReceived);
final long uploadMillis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
final String dataSize = FormatUtils.formatDataSize(bytesReceived);
logger.info("{} Successfully received {} ({}) from {} in {} milliseconds at a rate of {}", new Object[] { this, flowFileDescription, dataSize, transaction.getCommunicant().getUrl(), uploadMillis, uploadDataRate });
}
return flowFilesReceived.size();
}
use of org.apache.nifi.util.StopWatch in project nifi by apache.
the class AbstractFlowFileServerProtocol method receiveFlowFiles.
@Override
public int receiveFlowFiles(final Peer peer, final ProcessContext context, final ProcessSession session, final FlowFileCodec codec) throws IOException, ProtocolException {
if (!handshakeCompleted) {
throw new IllegalStateException("Handshake has not been completed");
}
if (shutdown) {
throw new IllegalStateException("Protocol is shutdown");
}
logger.debug("{} receiving FlowFiles from {}", this, peer);
final CommunicationsSession commsSession = peer.getCommunicationsSession();
final DataInputStream dis = new DataInputStream(commsSession.getInput().getInputStream());
String remoteDn = commsSession.getUserDn();
if (remoteDn == null) {
remoteDn = "none";
}
final StopWatch stopWatch = new StopWatch(true);
final CRC32 crc = new CRC32();
// Peer has data. Otherwise, we would not have been called, because they would not have sent
// a SEND_FLOWFILES request to use. Just decode the bytes into FlowFiles until peer says he's
// finished sending data.
final Set<FlowFile> flowFilesReceived = new HashSet<>();
long bytesReceived = 0L;
boolean continueTransaction = true;
while (continueTransaction) {
final long startNanos = System.nanoTime();
final InputStream flowFileInputStream = handshakeProperties.isUseGzip() ? new CompressionInputStream(dis) : dis;
final CheckedInputStream checkedInputStream = new CheckedInputStream(flowFileInputStream, crc);
final DataPacket dataPacket = codec.decode(checkedInputStream);
if (dataPacket == null) {
logger.debug("{} Received null dataPacket indicating the end of transaction from {}", this, peer);
break;
}
FlowFile flowFile = session.create();
flowFile = session.importFrom(dataPacket.getData(), flowFile);
flowFile = session.putAllAttributes(flowFile, dataPacket.getAttributes());
if (handshakeProperties.isUseGzip()) {
// Close CompressionInputStream to free acquired memory, without closing underlying stream.
checkedInputStream.close();
}
final long transferNanos = System.nanoTime() - startNanos;
final long transferMillis = TimeUnit.MILLISECONDS.convert(transferNanos, TimeUnit.NANOSECONDS);
final String sourceSystemFlowFileUuid = dataPacket.getAttributes().get(CoreAttributes.UUID.key());
final String host = StringUtils.isEmpty(peer.getHost()) ? "unknown" : peer.getHost();
final String port = peer.getPort() <= 0 ? "unknown" : String.valueOf(peer.getPort());
final Map<String, String> attributes = new HashMap<>(4);
attributes.put(CoreAttributes.UUID.key(), UUID.randomUUID().toString());
attributes.put(SiteToSiteAttributes.S2S_HOST.key(), host);
attributes.put(SiteToSiteAttributes.S2S_ADDRESS.key(), host + ":" + port);
flowFile = session.putAllAttributes(flowFile, attributes);
final String transitUri = createTransitUri(peer, sourceSystemFlowFileUuid);
session.getProvenanceReporter().receive(flowFile, transitUri, sourceSystemFlowFileUuid == null ? null : "urn:nifi:" + sourceSystemFlowFileUuid, "Remote Host=" + peer.getHost() + ", Remote DN=" + remoteDn, transferMillis);
session.transfer(flowFile, Relationship.ANONYMOUS);
flowFilesReceived.add(flowFile);
bytesReceived += flowFile.getSize();
final Response transactionResponse = readTransactionResponse(false, commsSession);
switch(transactionResponse.getCode()) {
case CONTINUE_TRANSACTION:
logger.debug("{} Received ContinueTransaction indicator from {}", this, peer);
break;
case FINISH_TRANSACTION:
logger.debug("{} Received FinishTransaction indicator from {}", this, peer);
continueTransaction = false;
break;
case CANCEL_TRANSACTION:
logger.info("{} Received CancelTransaction indicator from {} with explanation {}", this, peer, transactionResponse.getMessage());
session.rollback();
return 0;
default:
throw new ProtocolException("Received unexpected response from peer: when expecting Continue Transaction or Finish Transaction, received" + transactionResponse);
}
}
// we received a FINISH_TRANSACTION indicator. Send back a CONFIRM_TRANSACTION message
// to peer so that we can verify that the connection is still open. This is a two-phase commit,
// which helps to prevent the chances of data duplication. Without doing this, we may commit the
// session and then when we send the response back to the peer, the peer may have timed out and may not
// be listening. As a result, it will re-send the data. By doing this two-phase commit, we narrow the
// Critical Section involved in this transaction so that rather than the Critical Section being the
// time window involved in the entire transaction, it is reduced to a simple round-trip conversation.
logger.debug("{} Sending CONFIRM_TRANSACTION Response Code to {}", this, peer);
String calculatedCRC = String.valueOf(crc.getValue());
writeTransactionResponse(false, ResponseCode.CONFIRM_TRANSACTION, commsSession, calculatedCRC);
FlowFileTransaction transaction = new FlowFileTransaction(session, context, stopWatch, bytesReceived, flowFilesReceived, calculatedCRC);
return commitReceiveTransaction(peer, transaction);
}
use of org.apache.nifi.util.StopWatch in project nifi by apache.
the class AbstractFlowFileServerProtocol method transferFlowFiles.
@Override
public int transferFlowFiles(final Peer peer, final ProcessContext context, final ProcessSession session, final FlowFileCodec codec) throws IOException, ProtocolException {
if (!handshakeCompleted) {
throw new IllegalStateException("Handshake has not been completed");
}
if (shutdown) {
throw new IllegalStateException("Protocol is shutdown");
}
logger.debug("{} Sending FlowFiles to {}", this, peer);
final CommunicationsSession commsSession = peer.getCommunicationsSession();
String remoteDn = commsSession.getUserDn();
if (remoteDn == null) {
remoteDn = "none";
}
FlowFile flowFile = session.get();
if (flowFile == null) {
// we have no data to send. Notify the peer.
logger.debug("{} No data to send to {}", this, peer);
writeTransactionResponse(true, ResponseCode.NO_MORE_DATA, commsSession);
return 0;
}
// we have data to send.
logger.debug("{} Data is available to send to {}", this, peer);
writeTransactionResponse(true, ResponseCode.MORE_DATA, commsSession);
final StopWatch stopWatch = new StopWatch(true);
long bytesSent = 0L;
final Set<FlowFile> flowFilesSent = new HashSet<>();
final CRC32 crc = new CRC32();
// send data until we reach some batch size
boolean continueTransaction = true;
final long startNanos = System.nanoTime();
String calculatedCRC = "";
OutputStream os = new DataOutputStream(commsSession.getOutput().getOutputStream());
while (continueTransaction) {
final boolean useGzip = handshakeProperties.isUseGzip();
final OutputStream flowFileOutputStream = useGzip ? new CompressionOutputStream(os) : os;
logger.debug("{} Sending {} to {}", new Object[] { this, flowFile, peer });
final CheckedOutputStream checkedOutputStream = new CheckedOutputStream(flowFileOutputStream, crc);
final StopWatch transferWatch = new StopWatch(true);
final FlowFile toSend = flowFile;
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
final DataPacket dataPacket = new StandardDataPacket(toSend.getAttributes(), in, toSend.getSize());
codec.encode(dataPacket, checkedOutputStream);
}
});
final long transmissionMillis = transferWatch.getElapsed(TimeUnit.MILLISECONDS);
// (CompressionOutputStream will not close the underlying stream when it's closed)
if (useGzip) {
checkedOutputStream.close();
}
flowFilesSent.add(flowFile);
bytesSent += flowFile.getSize();
final String transitUri = createTransitUri(peer, flowFile.getAttribute(CoreAttributes.UUID.key()));
session.getProvenanceReporter().send(flowFile, transitUri, "Remote Host=" + peer.getHost() + ", Remote DN=" + remoteDn, transmissionMillis, false);
session.remove(flowFile);
// determine if we should check for more data on queue.
final long sendingNanos = System.nanoTime() - startNanos;
boolean poll = true;
double batchDurationNanos = handshakeProperties.getBatchDurationNanos();
if (sendingNanos >= batchDurationNanos && batchDurationNanos > 0L) {
poll = false;
}
double batchBytes = handshakeProperties.getBatchBytes();
if (bytesSent >= batchBytes && batchBytes > 0L) {
poll = false;
}
double batchCount = handshakeProperties.getBatchCount();
if (flowFilesSent.size() >= batchCount && batchCount > 0) {
poll = false;
}
if (batchDurationNanos == 0 && batchBytes == 0 && batchCount == 0) {
poll = (sendingNanos < DEFAULT_BATCH_NANOS);
}
if (poll) {
// we've not elapsed the requested sending duration, so get more data.
flowFile = session.get();
} else {
flowFile = null;
}
continueTransaction = (flowFile != null);
if (continueTransaction) {
logger.debug("{} Sending ContinueTransaction indicator to {}", this, peer);
writeTransactionResponse(true, ResponseCode.CONTINUE_TRANSACTION, commsSession);
} else {
logger.debug("{} Sending FinishTransaction indicator to {}", this, peer);
writeTransactionResponse(true, ResponseCode.FINISH_TRANSACTION, commsSession);
calculatedCRC = String.valueOf(checkedOutputStream.getChecksum().getValue());
}
}
FlowFileTransaction transaction = new FlowFileTransaction(session, context, stopWatch, bytesSent, flowFilesSent, calculatedCRC);
return commitTransferTransaction(peer, transaction);
}
use of org.apache.nifi.util.StopWatch in project nifi by apache.
the class QueryCassandra method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile fileToProcess = null;
if (context.hasIncomingConnection()) {
fileToProcess = session.get();
// we know that we should run only if we have a FlowFile.
if (fileToProcess == null && context.hasNonLoopConnection()) {
return;
}
}
final ComponentLog logger = getLogger();
final String selectQuery = context.getProperty(CQL_SELECT_QUERY).evaluateAttributeExpressions(fileToProcess).getValue();
final long queryTimeout = context.getProperty(QUERY_TIMEOUT).evaluateAttributeExpressions(fileToProcess).asTimePeriod(TimeUnit.MILLISECONDS);
final String outputFormat = context.getProperty(OUTPUT_FORMAT).getValue();
final Charset charset = Charset.forName(context.getProperty(CHARSET).evaluateAttributeExpressions(fileToProcess).getValue());
final StopWatch stopWatch = new StopWatch(true);
if (fileToProcess == null) {
fileToProcess = session.create();
}
try {
// The documentation for the driver recommends the session remain open the entire time the processor is running
// and states that it is thread-safe. This is why connectionSession is not in a try-with-resources.
final Session connectionSession = cassandraSession.get();
final ResultSetFuture queryFuture = connectionSession.executeAsync(selectQuery);
final AtomicLong nrOfRows = new AtomicLong(0L);
fileToProcess = session.write(fileToProcess, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
try {
logger.debug("Executing CQL query {}", new Object[] { selectQuery });
final ResultSet resultSet;
if (queryTimeout > 0) {
resultSet = queryFuture.getUninterruptibly(queryTimeout, TimeUnit.MILLISECONDS);
if (AVRO_FORMAT.equals(outputFormat)) {
nrOfRows.set(convertToAvroStream(resultSet, out, queryTimeout, TimeUnit.MILLISECONDS));
} else if (JSON_FORMAT.equals(outputFormat)) {
nrOfRows.set(convertToJsonStream(resultSet, out, charset, queryTimeout, TimeUnit.MILLISECONDS));
}
} else {
resultSet = queryFuture.getUninterruptibly();
if (AVRO_FORMAT.equals(outputFormat)) {
nrOfRows.set(convertToAvroStream(resultSet, out, 0, null));
} else if (JSON_FORMAT.equals(outputFormat)) {
nrOfRows.set(convertToJsonStream(resultSet, out, charset, 0, null));
}
}
} catch (final TimeoutException | InterruptedException | ExecutionException e) {
throw new ProcessException(e);
}
}
});
// set attribute how many rows were selected
fileToProcess = session.putAttribute(fileToProcess, RESULT_ROW_COUNT, String.valueOf(nrOfRows.get()));
// set mime.type based on output format
fileToProcess = session.putAttribute(fileToProcess, CoreAttributes.MIME_TYPE.key(), JSON_FORMAT.equals(outputFormat) ? "application/json" : "application/avro-binary");
logger.info("{} contains {} Avro records; transferring to 'success'", new Object[] { fileToProcess, nrOfRows.get() });
session.getProvenanceReporter().modifyContent(fileToProcess, "Retrieved " + nrOfRows.get() + " rows", stopWatch.getElapsed(TimeUnit.MILLISECONDS));
session.transfer(fileToProcess, REL_SUCCESS);
} catch (final NoHostAvailableException nhae) {
getLogger().error("No host in the Cassandra cluster can be contacted successfully to execute this query", nhae);
// Log up to 10 error messages. Otherwise if a 1000-node cluster was specified but there was no connectivity,
// a thousand error messages would be logged. However we would like information from Cassandra itself, so
// cap the error limit at 10, format the messages, and don't include the stack trace (it is displayed by the
// logger message above).
getLogger().error(nhae.getCustomMessage(10, true, false));
fileToProcess = session.penalize(fileToProcess);
session.transfer(fileToProcess, REL_RETRY);
} catch (final QueryExecutionException qee) {
logger.error("Cannot execute the query with the requested consistency level successfully", qee);
fileToProcess = session.penalize(fileToProcess);
session.transfer(fileToProcess, REL_RETRY);
} catch (final QueryValidationException qve) {
if (context.hasIncomingConnection()) {
logger.error("The CQL query {} is invalid due to syntax error, authorization issue, or another " + "validation problem; routing {} to failure", new Object[] { selectQuery, fileToProcess }, qve);
fileToProcess = session.penalize(fileToProcess);
session.transfer(fileToProcess, REL_FAILURE);
} else {
// This can happen if any exceptions occur while setting up the connection, statement, etc.
logger.error("The CQL query {} is invalid due to syntax error, authorization issue, or another " + "validation problem", new Object[] { selectQuery }, qve);
session.remove(fileToProcess);
context.yield();
}
} catch (final ProcessException e) {
if (context.hasIncomingConnection()) {
logger.error("Unable to execute CQL select query {} for {} due to {}; routing to failure", new Object[] { selectQuery, fileToProcess, e });
fileToProcess = session.penalize(fileToProcess);
session.transfer(fileToProcess, REL_FAILURE);
} else {
logger.error("Unable to execute CQL select query {} due to {}", new Object[] { selectQuery, e });
session.remove(fileToProcess);
context.yield();
}
}
}
Aggregations