use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class PutSplunk method processDelimitedMessages.
/**
* Read delimited messages from the FlowFile tracking which messages are sent successfully.
*/
private void processDelimitedMessages(final ProcessContext context, final ProcessSession session, final FlowFile flowFile, final ChannelSender sender, final String delimiter) {
final String protocol = context.getProperty(PROTOCOL).getValue();
final byte[] delimiterBytes = delimiter.getBytes(StandardCharsets.UTF_8);
// The NonThreadSafeCircularBuffer allows us to add a byte from the stream one at a time and see if it matches
// some pattern. We can use this to search for the delimiter as we read through the stream of bytes in the FlowFile
final NonThreadSafeCircularBuffer buffer = new NonThreadSafeCircularBuffer(delimiterBytes);
final AtomicLong messagesSent = new AtomicLong(0L);
final FlowFileMessageBatch messageBatch = new FlowFileMessageBatch(session, flowFile);
activeBatches.add(messageBatch);
try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream rawIn) throws IOException {
// contents of a single message
byte[] data = null;
boolean streamFinished = false;
int nextByte;
try (final InputStream bufferedIn = new BufferedInputStream(rawIn);
final ByteCountingInputStream in = new ByteCountingInputStream(bufferedIn)) {
long messageStartOffset = in.getBytesConsumed();
// read until we're out of data.
while (!streamFinished) {
nextByte = in.read();
if (nextByte > -1) {
baos.write(nextByte);
}
if (nextByte == -1) {
// we ran out of data. This message is complete.
data = getMessage(baos, baos.size(), protocol);
streamFinished = true;
} else if (buffer.addAndCompare((byte) nextByte)) {
// we matched our delimiter. This message is complete. We want all of the bytes from the
// underlying BAOS except for the last 'delimiterBytes.length' bytes because we don't want
// the delimiter itself to be sent.
data = getMessage(baos, baos.size() - delimiterBytes.length, protocol);
}
if (data != null) {
final long messageEndOffset = in.getBytesConsumed();
// If the message has no data, ignore it.
if (data.length != 0) {
final long rangeStart = messageStartOffset;
try {
sender.send(data);
messageBatch.addSuccessfulRange(rangeStart, messageEndOffset);
messagesSent.incrementAndGet();
} catch (final IOException e) {
messageBatch.addFailedRange(rangeStart, messageEndOffset, e);
}
}
// reset BAOS so that we can start a new message.
baos.reset();
data = null;
messageStartOffset = in.getBytesConsumed();
}
}
}
}
});
messageBatch.setNumMessages(messagesSent.get());
} catch (final IOException ioe) {
// Since this can be thrown only from closing the ByteArrayOutputStream(), we have already
// completed everything that we need to do, so there's nothing really to be done here
}
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class SpringContextProcessor method extractMessage.
/**
* Extracts contents of the {@link FlowFile} to byte array.
*/
private byte[] extractMessage(FlowFile flowFile, ProcessSession processSession) {
final byte[] messageContent = new byte[(int) flowFile.getSize()];
processSession.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
StreamUtils.fillBuffer(in, messageContent, true);
}
});
return messageContent;
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class TestJmsConsumer method testMap2FlowFileBytesMessage.
/**
* Test BytesMessage to FlowFile conversion
*
* @throws java.lang.Exception ex
*/
@Test
public void testMap2FlowFileBytesMessage() throws Exception {
TestRunner runner = TestRunners.newTestRunner(GetJMSQueue.class);
BytesMessage bytesMessage = new ActiveMQBytesMessage();
String sourceString = "Apache NiFi is an easy to use, powerful, and reliable system to process and distribute data.!";
byte[] payload = sourceString.getBytes("UTF-8");
bytesMessage.writeBytes(payload);
bytesMessage.reset();
ProcessContext context = runner.getProcessContext();
ProcessSession session = runner.getProcessSessionFactory().createSession();
ProcessorInitializationContext pic = new MockProcessorInitializationContext(runner.getProcessor(), (MockProcessContext) runner.getProcessContext());
JmsProcessingSummary summary = JmsConsumer.map2FlowFile(context, session, bytesMessage, true, pic.getLogger());
assertEquals("BytesMessage content length should equal to FlowFile content size", payload.length, summary.getLastFlowFile().getSize());
final byte[] buffer = new byte[payload.length];
runner.clearTransferState();
session.read(summary.getLastFlowFile(), new InputStreamCallback() {
@Override
public void process(InputStream in) throws IOException {
StreamUtils.fillBuffer(in, buffer, false);
}
});
String contentString = new String(buffer, "UTF-8");
assertEquals("", sourceString, contentString);
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class PutHiveStreaming method onTrigger.
private void onTrigger(ProcessContext context, ProcessSession session, FunctionContext functionContext) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final String dbName = context.getProperty(DB_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
// Only allow one thread to work on a DB/table at a time
final Semaphore newSemaphore = new Semaphore(1);
Semaphore semaphore = tableSemaphoreMap.putIfAbsent(dbName + "." + tableName, newSemaphore);
if (semaphore == null) {
semaphore = newSemaphore;
}
boolean gotSemaphore = false;
try {
gotSemaphore = semaphore.tryAcquire(0, TimeUnit.SECONDS);
} catch (InterruptedException ie) {
// Nothing to do, gotSemaphore defaults to false
}
if (!gotSemaphore) {
// We didn't get a chance to acquire, so rollback the session and try again next time
session.rollback();
return;
}
final ComponentLog log = getLogger();
final String metastoreUri = context.getProperty(METASTORE_URI).evaluateAttributeExpressions(flowFile).getValue();
final boolean autoCreatePartitions = context.getProperty(AUTOCREATE_PARTITIONS).asBoolean();
final Integer maxConnections = context.getProperty(MAX_OPEN_CONNECTIONS).asInteger();
final Integer heartbeatInterval = context.getProperty(HEARTBEAT_INTERVAL).evaluateAttributeExpressions().asInteger();
final Integer txnsPerBatch = context.getProperty(TXNS_PER_BATCH).evaluateAttributeExpressions(flowFile).asInteger();
final Integer recordsPerTxn = context.getProperty(RECORDS_PER_TXN).evaluateAttributeExpressions(flowFile).asInteger();
final Map<HiveEndPoint, HiveWriter> myWriters = new ConcurrentHashMap<>();
threadWriterList.add(myWriters);
HiveOptions o = new HiveOptions(metastoreUri, dbName, tableName).withTxnsPerBatch(txnsPerBatch).withAutoCreatePartitions(autoCreatePartitions).withMaxOpenConnections(maxConnections).withHeartBeatInterval(heartbeatInterval).withCallTimeout(callTimeout);
if (SecurityUtil.isSecurityEnabled(hiveConfig)) {
final String explicitPrincipal = context.getProperty(kerberosProperties.getKerberosPrincipal()).evaluateAttributeExpressions().getValue();
final String explicitKeytab = context.getProperty(kerberosProperties.getKerberosKeytab()).evaluateAttributeExpressions().getValue();
final KerberosCredentialsService credentialsService = context.getProperty(KERBEROS_CREDENTIALS_SERVICE).asControllerService(KerberosCredentialsService.class);
final String resolvedPrincipal;
final String resolvedKeytab;
if (credentialsService == null) {
resolvedPrincipal = explicitPrincipal;
resolvedKeytab = explicitKeytab;
} else {
resolvedPrincipal = credentialsService.getPrincipal();
resolvedKeytab = credentialsService.getKeytab();
}
o = o.withKerberosPrincipal(resolvedPrincipal).withKerberosKeytab(resolvedKeytab);
}
final HiveOptions options = o;
// Store the original class loader, then explicitly set it to this class's classloader (for use by the Hive Metastore)
ClassLoader originalClassloader = Thread.currentThread().getContextClassLoader();
Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
final List<String> partitionColumnList;
final String partitionColumns = context.getProperty(PARTITION_COLUMNS).evaluateAttributeExpressions().getValue();
if (partitionColumns == null || partitionColumns.isEmpty()) {
partitionColumnList = Collections.emptyList();
} else {
String[] partitionCols = partitionColumns.split(",");
partitionColumnList = new ArrayList<>(partitionCols.length);
for (String col : partitionCols) {
partitionColumnList.add(col.trim());
}
}
final AtomicReference<List<HiveStreamingRecord>> successfulRecords = new AtomicReference<>();
successfulRecords.set(new ArrayList<>());
final FlowFile inputFlowFile = flowFile;
final RoutingResult result = new RoutingResult();
final ExceptionHandler<FunctionContext> exceptionHandler = new ExceptionHandler<>();
exceptionHandler.mapException(s -> {
try {
if (s == null) {
return ErrorTypes.PersistentFailure;
}
throw s;
} catch (IllegalArgumentException | HiveWriter.WriteFailure | SerializationError inputError) {
return ErrorTypes.InvalidInput;
} catch (HiveWriter.CommitFailure | HiveWriter.TxnBatchFailure | HiveWriter.TxnFailure writerTxError) {
return ErrorTypes.TemporalInputFailure;
} catch (ConnectionError | HiveWriter.ConnectFailure connectionError) {
// Can't connect to Hive endpoint.
log.error("Error connecting to Hive endpoint: table {} at {}", new Object[] { options.getTableName(), options.getMetaStoreURI() });
return ErrorTypes.TemporalFailure;
} catch (IOException | InterruptedException tempError) {
return ErrorTypes.TemporalFailure;
} catch (Exception t) {
return ErrorTypes.UnknownFailure;
}
});
final BiFunction<FunctionContext, ErrorTypes, ErrorTypes.Result> adjustError = RollbackOnFailure.createAdjustError(getLogger());
exceptionHandler.adjustError(adjustError);
// Create output flow files and their Avro writers
functionContext.setFlowFiles(session.create(inputFlowFile), session.create(inputFlowFile));
try {
session.read(inputFlowFile, new InputStreamCallback() {
@Override
public void process(InputStream in) throws IOException {
try (final DataFileStream<GenericRecord> reader = new DataFileStream<>(in, new GenericDatumReader<GenericRecord>())) {
GenericRecord currRecord = null;
// Copy codec and schema information to all writers
final String codec = reader.getMetaString(DataFileConstants.CODEC) == null ? DataFileConstants.NULL_CODEC : reader.getMetaString(DataFileConstants.CODEC);
functionContext.initAvroWriters(session, codec, reader);
Runnable flushSuccessfulRecords = () -> {
// Now send the records to the successful FlowFile and update the success count
functionContext.appendRecordsToSuccess(session, successfulRecords.get());
// Clear the list of successful records, we'll use it at the end when we flush whatever records are left
successfulRecords.set(new ArrayList<>());
};
while (reader.hasNext()) {
// We can NOT reuse currRecord here, because currRecord is accumulated in successful records.
// If we use the same GenericRecord instance, every record ends up having the same contents.
// To avoid this, we need to create a brand new GenericRecord instance here each time.
currRecord = reader.next();
functionContext.recordCount.incrementAndGet();
// Extract the partition values (they must be put separately into the Hive Streaming API)
List<String> partitionValues = new ArrayList<>();
if (!exceptionHandler.execute(functionContext, currRecord, input -> {
for (String partition : partitionColumnList) {
Object partitionValue = input.get(partition);
if (partitionValue == null) {
throw new IllegalArgumentException("Partition column '" + partition + "' not found in Avro record");
}
partitionValues.add(partitionValue.toString());
}
}, onRecordError(context, session, myWriters))) {
continue;
}
final HiveStreamingRecord record = new HiveStreamingRecord(partitionValues, currRecord);
final AtomicReference<HiveWriter> hiveWriterRef = new AtomicReference<>();
// Write record to Hive streaming
if (!exceptionHandler.execute(functionContext, record, input -> {
final HiveEndPoint endPoint = makeHiveEndPoint(record.getPartitionValues(), options);
final HiveWriter hiveWriter = getOrCreateWriter(myWriters, options, endPoint);
hiveWriterRef.set(hiveWriter);
hiveWriter.write(record.getRecord().toString().getBytes(StandardCharsets.UTF_8));
successfulRecords.get().add(record);
}, onHiveRecordError(context, session, myWriters))) {
continue;
}
// If we've reached the records-per-transaction limit, flush the Hive Writer and update the Avro Writer for successful records
final HiveWriter hiveWriter = hiveWriterRef.get();
if (hiveWriter.getTotalRecords() >= recordsPerTxn) {
exceptionHandler.execute(functionContext, successfulRecords.get(), input -> {
hiveWriter.flush(true);
// Proceed function context. Process session can't be rollback anymore.
functionContext.proceed();
// Now send the records to the success relationship and update the success count
flushSuccessfulRecords.run();
}, onHiveRecordsError(context, session, myWriters).andThen((fc, input, res, commitException) -> {
// Reset hiveWriter for succeeding records.
switch(res.destination()) {
case Retry:
case Failure:
try {
// Abort current tx and move to next.
hiveWriter.abort();
} catch (Exception e) {
// Can't even abort properly, throw a process exception
throw new ProcessException(e);
}
}
}));
}
}
exceptionHandler.execute(functionContext, successfulRecords.get(), input -> {
// Finish any transactions
flushAllWriters(myWriters, true);
closeAllWriters(myWriters);
// Now send any remaining records to the success relationship and update the count
flushSuccessfulRecords.run();
// Append successfulRecords on failure.
}, onHiveRecordsError(context, session, myWriters));
} catch (IOException ioe) {
// The Avro file is invalid (or may not be an Avro file at all), send it to failure
final ErrorTypes.Result adjusted = adjustError.apply(functionContext, ErrorTypes.InvalidInput);
final String msg = "The incoming flow file can not be read as an Avro file";
switch(adjusted.destination()) {
case Failure:
log.error(msg, ioe);
result.routeTo(inputFlowFile, REL_FAILURE);
break;
case ProcessException:
throw new ProcessException(msg, ioe);
}
}
}
});
// If we got here, we've processed the outgoing flow files correctly, so remove the incoming one if necessary
if (result.getRoutedFlowFiles().values().stream().noneMatch(routed -> routed.contains(inputFlowFile))) {
session.remove(inputFlowFile);
}
} catch (DiscontinuedException e) {
// The input FlowFile processing is discontinued. Keep it in the input queue.
getLogger().warn("Discontinued processing for {} due to {}", new Object[] { flowFile, e }, e);
result.routeTo(flowFile, Relationship.SELF);
} catch (ShouldRetryException e) {
// This exception is already a result of adjusting an error, so simply transfer the FlowFile to retry.
getLogger().error(e.getMessage(), e);
flowFile = session.penalize(flowFile);
result.routeTo(flowFile, REL_RETRY);
} finally {
threadWriterList.remove(myWriters);
functionContext.transferFlowFiles(session, result, options);
// Restore original class loader, might not be necessary but is good practice since the processor task changed it
Thread.currentThread().setContextClassLoader(originalClassloader);
semaphore.release();
}
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class RouteHL7 method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final Charset charset = Charset.forName(context.getProperty(CHARACTER_SET).evaluateAttributeExpressions(flowFile).getValue());
final byte[] buffer = new byte[(int) flowFile.getSize()];
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
StreamUtils.fillBuffer(in, buffer);
}
});
@SuppressWarnings("resource") final HapiContext hapiContext = new DefaultHapiContext();
hapiContext.setValidationContext((ca.uhn.hl7v2.validation.ValidationContext) ValidationContextFactory.noValidation());
final PipeParser parser = hapiContext.getPipeParser();
final String hl7Text = new String(buffer, charset);
final HL7Message message;
try {
final Message hapiMessage = parser.parse(hl7Text);
message = new HapiMessage(hapiMessage);
} catch (final Exception e) {
getLogger().error("Failed to parse {} as HL7 due to {}; routing to failure", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
return;
}
final Set<String> matchingRels = new HashSet<>();
final Map<Relationship, HL7Query> queryMap = queries;
for (final Map.Entry<Relationship, HL7Query> entry : queryMap.entrySet()) {
final Relationship relationship = entry.getKey();
final HL7Query query = entry.getValue();
final QueryResult result = query.evaluate(message);
if (result.isMatch()) {
FlowFile clone = session.clone(flowFile);
clone = session.putAttribute(clone, "RouteHL7.Route", relationship.getName());
session.transfer(clone, relationship);
session.getProvenanceReporter().route(clone, relationship);
matchingRels.add(relationship.getName());
}
}
session.transfer(flowFile, REL_ORIGINAL);
getLogger().info("Routed a copy of {} to {} relationships: {}", new Object[] { flowFile, matchingRels.size(), matchingRels });
}
Aggregations