use of org.apache.nifi.serialization.WriteResult in project nifi by apache.
the class ListenTCPRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final SocketChannelRecordReader socketRecordReader = pollForSocketRecordReader();
if (socketRecordReader == null) {
return;
}
if (socketRecordReader.isClosed()) {
getLogger().warn("Unable to read records from {}, socket already closed", new Object[] { getRemoteAddress(socketRecordReader) });
// still need to call close so the overall count is decremented
IOUtils.closeQuietly(socketRecordReader);
return;
}
final int recordBatchSize = context.getProperty(RECORD_BATCH_SIZE).asInteger();
final String readerErrorHandling = context.getProperty(READER_ERROR_HANDLING_STRATEGY).getValue();
final RecordSetWriterFactory recordSetWriterFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
// synchronize to ensure there are no stale values in the underlying SocketChannel
synchronized (socketRecordReader) {
FlowFile flowFile = session.create();
try {
// lazily creating the record reader here b/c we need a flow file, eventually shouldn't have to do this
RecordReader recordReader = socketRecordReader.getRecordReader();
if (recordReader == null) {
recordReader = socketRecordReader.createRecordReader(flowFile, getLogger());
}
Record record;
try {
record = recordReader.nextRecord();
} catch (final Exception e) {
boolean timeout = false;
// some of the underlying record libraries wrap the real exception in RuntimeException, so check each
// throwable (starting with the current one) to see if its a SocketTimeoutException
Throwable cause = e;
while (cause != null) {
if (cause instanceof SocketTimeoutException) {
timeout = true;
break;
}
cause = cause.getCause();
}
if (timeout) {
getLogger().debug("Timeout reading records, will try again later", e);
socketReaders.offer(socketRecordReader);
session.remove(flowFile);
return;
} else {
throw e;
}
}
if (record == null) {
getLogger().debug("No records available from {}, closing connection", new Object[] { getRemoteAddress(socketRecordReader) });
IOUtils.closeQuietly(socketRecordReader);
session.remove(flowFile);
return;
}
String mimeType = null;
WriteResult writeResult = null;
final RecordSchema recordSchema = recordSetWriterFactory.getSchema(Collections.EMPTY_MAP, record.getSchema());
try (final OutputStream out = session.write(flowFile);
final RecordSetWriter recordWriter = recordSetWriterFactory.createWriter(getLogger(), recordSchema, out)) {
// start the record set and write the first record from above
recordWriter.beginRecordSet();
writeResult = recordWriter.write(record);
while (record != null && writeResult.getRecordCount() < recordBatchSize) {
// if keeping then null out the record to break out of the loop, which will transfer what we have and close the connection
try {
record = recordReader.nextRecord();
} catch (final SocketTimeoutException ste) {
getLogger().debug("Timeout reading records, will try again later", ste);
break;
} catch (final Exception e) {
if (ERROR_HANDLING_DISCARD.getValue().equals(readerErrorHandling)) {
throw e;
} else {
record = null;
}
}
if (record != null) {
writeResult = recordWriter.write(record);
}
}
writeResult = recordWriter.finishRecordSet();
recordWriter.flush();
mimeType = recordWriter.getMimeType();
}
// if we didn't write any records then we need to remove the flow file
if (writeResult.getRecordCount() <= 0) {
getLogger().debug("Removing flow file, no records were written");
session.remove(flowFile);
} else {
final String sender = getRemoteAddress(socketRecordReader);
final Map<String, String> attributes = new HashMap<>(writeResult.getAttributes());
attributes.put(CoreAttributes.MIME_TYPE.key(), mimeType);
attributes.put("tcp.sender", sender);
attributes.put("tcp.port", String.valueOf(port));
attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
flowFile = session.putAllAttributes(flowFile, attributes);
final String senderHost = sender.startsWith("/") && sender.length() > 1 ? sender.substring(1) : sender;
final String transitUri = new StringBuilder().append("tcp").append("://").append(senderHost).append(":").append(port).toString();
session.getProvenanceReporter().receive(flowFile, transitUri);
session.transfer(flowFile, REL_SUCCESS);
}
getLogger().debug("Re-queuing connection for further processing...");
socketReaders.offer(socketRecordReader);
} catch (Exception e) {
getLogger().error("Error processing records: " + e.getMessage(), e);
IOUtils.closeQuietly(socketRecordReader);
session.remove(flowFile);
return;
}
}
}
use of org.apache.nifi.serialization.WriteResult in project nifi by apache.
the class ListenUDPRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final int maxBatchSize = context.getProperty(BATCH_SIZE).asInteger();
final Map<String, FlowFileRecordWriter> flowFileRecordWriters = new HashMap<>();
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
for (int i = 0; i < maxBatchSize; i++) {
// this processor isn't leveraging the error queue so don't bother polling to avoid the overhead
// if the error handling is ever changed to use the error queue then this flag needs to be changed as well
final StandardEvent event = getMessage(true, false, session);
// break out if we don't have any messages, don't yield since we already do a long poll inside getMessage
if (event == null) {
break;
}
// attempt to read all of the records from the current datagram into a list in memory so that we can ensure the
// entire datagram can be read as records, and if not transfer the whole thing to parse.failure
final RecordReader reader;
final List<Record> records = new ArrayList<>();
try (final InputStream in = new ByteArrayInputStream(event.getData())) {
reader = readerFactory.createRecordReader(Collections.emptyMap(), in, getLogger());
Record record;
while ((record = reader.nextRecord()) != null) {
records.add(record);
}
} catch (final Exception e) {
handleParseFailure(event, session, e);
continue;
}
if (records.size() == 0) {
handleParseFailure(event, session, null);
continue;
}
// see if we already started a flow file and writer for the given sender
// if an exception happens creating the flow file or writer, put the event in the error queue to try it again later
FlowFileRecordWriter flowFileRecordWriter = flowFileRecordWriters.get(event.getSender());
if (flowFileRecordWriter == null) {
FlowFile flowFile = null;
OutputStream rawOut = null;
RecordSetWriter writer = null;
try {
flowFile = session.create();
rawOut = session.write(flowFile);
final Record firstRecord = records.get(0);
final RecordSchema recordSchema = firstRecord.getSchema();
final RecordSchema writeSchema = writerFactory.getSchema(Collections.emptyMap(), recordSchema);
writer = writerFactory.createWriter(getLogger(), writeSchema, rawOut);
writer.beginRecordSet();
flowFileRecordWriter = new FlowFileRecordWriter(flowFile, writer);
flowFileRecordWriters.put(event.getSender(), flowFileRecordWriter);
} catch (final Exception ex) {
getLogger().error("Failed to properly initialize record writer. Datagram will be queued for re-processing.", ex);
try {
if (writer != null) {
writer.close();
}
} catch (final Exception e) {
getLogger().warn("Failed to close Record Writer", e);
}
if (rawOut != null) {
IOUtils.closeQuietly(rawOut);
}
if (flowFile != null) {
session.remove(flowFile);
}
context.yield();
break;
}
}
// attempt to write each record, if any record fails then remove the flow file and break out of the loop
final RecordSetWriter writer = flowFileRecordWriter.getRecordWriter();
try {
for (final Record record : records) {
writer.write(record);
}
} catch (Exception e) {
getLogger().error("Failed to write records due to: " + e.getMessage(), e);
IOUtils.closeQuietly(writer);
session.remove(flowFileRecordWriter.getFlowFile());
flowFileRecordWriters.remove(event.getSender());
break;
}
}
for (final Map.Entry<String, FlowFileRecordWriter> entry : flowFileRecordWriters.entrySet()) {
final String sender = entry.getKey();
final FlowFileRecordWriter flowFileRecordWriter = entry.getValue();
final RecordSetWriter writer = flowFileRecordWriter.getRecordWriter();
FlowFile flowFile = flowFileRecordWriter.getFlowFile();
try {
final WriteResult writeResult;
try {
writeResult = writer.finishRecordSet();
} finally {
writer.close();
}
if (writeResult.getRecordCount() == 0) {
session.remove(flowFile);
continue;
}
final Map<String, String> attributes = new HashMap<>();
attributes.putAll(getAttributes(sender));
attributes.putAll(writeResult.getAttributes());
attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.getRecordCount()));
flowFile = session.putAllAttributes(flowFile, attributes);
session.transfer(flowFile, REL_SUCCESS);
final String transitUri = getTransitUri(sender);
session.getProvenanceReporter().receive(flowFile, transitUri);
} catch (final Exception e) {
getLogger().error("Unable to properly complete record set due to: " + e.getMessage(), e);
session.remove(flowFile);
}
}
}
use of org.apache.nifi.serialization.WriteResult in project nifi by apache.
the class PartitionRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final Map<String, RecordPath> recordPaths;
try {
recordPaths = context.getProperties().keySet().stream().filter(prop -> prop.isDynamic()).collect(Collectors.toMap(prop -> prop.getName(), prop -> getRecordPath(context, prop, flowFile)));
} catch (final Exception e) {
getLogger().error("Failed to compile RecordPath for {}; routing to failure", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
return;
}
final Map<RecordValueMap, RecordSetWriter> writerMap = new HashMap<>();
try (final InputStream in = session.read(flowFile)) {
final Map<String, String> originalAttributes = flowFile.getAttributes();
final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger());
final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema());
Record record;
while ((record = reader.nextRecord()) != null) {
final Map<String, List<ValueWrapper>> recordMap = new HashMap<>();
// Evaluate all of the RecordPath's for this Record
for (final Map.Entry<String, RecordPath> entry : recordPaths.entrySet()) {
final String propName = entry.getKey();
final RecordPath recordPath = entry.getValue();
final Stream<FieldValue> fieldValueStream = recordPath.evaluate(record).getSelectedFields();
final List<ValueWrapper> fieldValues = fieldValueStream.map(fieldVal -> new ValueWrapper(fieldVal.getValue())).collect(Collectors.toList());
recordMap.put(propName, fieldValues);
}
final RecordValueMap recordValueMap = new RecordValueMap(recordMap);
// Get the RecordSetWriter that contains the same values for all RecordPaths - or create one if none exists.
RecordSetWriter writer = writerMap.get(recordValueMap);
if (writer == null) {
final FlowFile childFlowFile = session.create(flowFile);
recordValueMap.setFlowFile(childFlowFile);
final OutputStream out = session.write(childFlowFile);
writer = writerFactory.createWriter(getLogger(), writeSchema, out);
writer.beginRecordSet();
writerMap.put(recordValueMap, writer);
}
writer.write(record);
}
// For each RecordSetWriter, finish the record set and close the writer.
for (final Map.Entry<RecordValueMap, RecordSetWriter> entry : writerMap.entrySet()) {
final RecordValueMap valueMap = entry.getKey();
final RecordSetWriter writer = entry.getValue();
final WriteResult writeResult = writer.finishRecordSet();
writer.close();
final Map<String, String> attributes = new HashMap<>();
attributes.putAll(valueMap.getAttributes());
attributes.putAll(writeResult.getAttributes());
attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
FlowFile childFlowFile = valueMap.getFlowFile();
childFlowFile = session.putAllAttributes(childFlowFile, attributes);
session.adjustCounter("Record Processed", writeResult.getRecordCount(), false);
}
} catch (final Exception e) {
for (final Map.Entry<RecordValueMap, RecordSetWriter> entry : writerMap.entrySet()) {
final RecordValueMap valueMap = entry.getKey();
final RecordSetWriter writer = entry.getValue();
try {
writer.close();
} catch (final IOException e1) {
getLogger().warn("Failed to close Record Writer for {}; some resources may not be cleaned up appropriately", new Object[] { flowFile, e1 });
}
session.remove(valueMap.getFlowFile());
}
getLogger().error("Failed to partition {}", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
return;
}
// because we want to ensure that we are able to remove the child flowfiles in case of a failure.
for (final RecordValueMap valueMap : writerMap.keySet()) {
session.transfer(valueMap.getFlowFile(), REL_SUCCESS);
}
session.transfer(flowFile, REL_ORIGINAL);
}
use of org.apache.nifi.serialization.WriteResult in project nifi by apache.
the class PublisherLease method publish.
void publish(final FlowFile flowFile, final RecordSet recordSet, final RecordSetWriterFactory writerFactory, final RecordSchema schema, final String messageKeyField, final String topic) throws IOException {
if (tracker == null) {
tracker = new InFlightMessageTracker(logger);
}
final ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
Record record;
int recordCount = 0;
try {
while ((record = recordSet.next()) != null) {
recordCount++;
baos.reset();
Map<String, String> additionalAttributes = Collections.emptyMap();
try (final RecordSetWriter writer = writerFactory.createWriter(logger, schema, baos)) {
final WriteResult writeResult = writer.write(record);
additionalAttributes = writeResult.getAttributes();
writer.flush();
}
final byte[] messageContent = baos.toByteArray();
final String key = messageKeyField == null ? null : record.getAsString(messageKeyField);
final byte[] messageKey = (key == null) ? null : key.getBytes(StandardCharsets.UTF_8);
publish(flowFile, additionalAttributes, messageKey, messageContent, topic, tracker);
if (tracker.isFailed(flowFile)) {
// If we have a failure, don't try to send anything else.
return;
}
}
if (recordCount == 0) {
tracker.trackEmpty(flowFile);
}
} catch (final TokenTooLargeException ttle) {
tracker.fail(flowFile, ttle);
} catch (final SchemaNotFoundException snfe) {
throw new IOException(snfe);
} catch (final Exception e) {
tracker.fail(flowFile, e);
poison();
throw e;
}
}
use of org.apache.nifi.serialization.WriteResult in project nifi by apache.
the class QueryRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
final FlowFile original = session.get();
if (original == null) {
return;
}
final StopWatch stopWatch = new StopWatch(true);
final RecordSetWriterFactory recordSetWriterFactory = context.getProperty(RECORD_WRITER_FACTORY).asControllerService(RecordSetWriterFactory.class);
final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER_FACTORY).asControllerService(RecordReaderFactory.class);
final Map<FlowFile, Relationship> transformedFlowFiles = new HashMap<>();
final Set<FlowFile> createdFlowFiles = new HashSet<>();
// Determine the Record Reader's schema
final RecordSchema readerSchema;
try (final InputStream rawIn = session.read(original)) {
final Map<String, String> originalAttributes = original.getAttributes();
final RecordReader reader = recordReaderFactory.createRecordReader(originalAttributes, rawIn, getLogger());
final RecordSchema inputSchema = reader.getSchema();
readerSchema = recordSetWriterFactory.getSchema(originalAttributes, inputSchema);
} catch (final Exception e) {
getLogger().error("Failed to determine Record Schema from {}; routing to failure", new Object[] { original, e });
session.transfer(original, REL_FAILURE);
return;
}
// Determine the schema for writing the data
final Map<String, String> originalAttributes = original.getAttributes();
int recordsRead = 0;
try {
for (final PropertyDescriptor descriptor : context.getProperties().keySet()) {
if (!descriptor.isDynamic()) {
continue;
}
final Relationship relationship = new Relationship.Builder().name(descriptor.getName()).build();
// We have to fork a child because we may need to read the input FlowFile more than once,
// and we cannot call session.read() on the original FlowFile while we are within a write
// callback for the original FlowFile.
FlowFile transformed = session.create(original);
boolean flowFileRemoved = false;
try {
final String sql = context.getProperty(descriptor).evaluateAttributeExpressions(original).getValue();
final AtomicReference<WriteResult> writeResultRef = new AtomicReference<>();
final QueryResult queryResult;
if (context.getProperty(CACHE_SCHEMA).asBoolean()) {
queryResult = queryWithCache(session, original, sql, context, recordReaderFactory);
} else {
queryResult = query(session, original, sql, context, recordReaderFactory);
}
final AtomicReference<String> mimeTypeRef = new AtomicReference<>();
try {
final ResultSet rs = queryResult.getResultSet();
transformed = session.write(transformed, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
final ResultSetRecordSet recordSet;
final RecordSchema writeSchema;
try {
recordSet = new ResultSetRecordSet(rs, readerSchema);
final RecordSchema resultSetSchema = recordSet.getSchema();
writeSchema = recordSetWriterFactory.getSchema(originalAttributes, resultSetSchema);
} catch (final SQLException | SchemaNotFoundException e) {
throw new ProcessException(e);
}
try (final RecordSetWriter resultSetWriter = recordSetWriterFactory.createWriter(getLogger(), writeSchema, out)) {
writeResultRef.set(resultSetWriter.write(recordSet));
mimeTypeRef.set(resultSetWriter.getMimeType());
} catch (final Exception e) {
throw new IOException(e);
}
}
});
} finally {
closeQuietly(queryResult);
}
recordsRead = Math.max(recordsRead, queryResult.getRecordsRead());
final WriteResult result = writeResultRef.get();
if (result.getRecordCount() == 0 && !context.getProperty(INCLUDE_ZERO_RECORD_FLOWFILES).asBoolean()) {
session.remove(transformed);
flowFileRemoved = true;
transformedFlowFiles.remove(transformed);
getLogger().info("Transformed {} but the result contained no data so will not pass on a FlowFile", new Object[] { original });
} else {
final Map<String, String> attributesToAdd = new HashMap<>();
if (result.getAttributes() != null) {
attributesToAdd.putAll(result.getAttributes());
}
attributesToAdd.put(CoreAttributes.MIME_TYPE.key(), mimeTypeRef.get());
attributesToAdd.put("record.count", String.valueOf(result.getRecordCount()));
transformed = session.putAllAttributes(transformed, attributesToAdd);
transformedFlowFiles.put(transformed, relationship);
session.adjustCounter("Records Written", result.getRecordCount(), false);
}
} finally {
// Ensure that we have the FlowFile in the set in case we throw any Exception
if (!flowFileRemoved) {
createdFlowFiles.add(transformed);
}
}
}
final long elapsedMillis = stopWatch.getElapsed(TimeUnit.MILLISECONDS);
if (transformedFlowFiles.size() > 0) {
session.getProvenanceReporter().fork(original, transformedFlowFiles.keySet(), elapsedMillis);
for (final Map.Entry<FlowFile, Relationship> entry : transformedFlowFiles.entrySet()) {
final FlowFile transformed = entry.getKey();
final Relationship relationship = entry.getValue();
session.getProvenanceReporter().route(transformed, relationship);
session.transfer(transformed, relationship);
}
}
getLogger().info("Successfully queried {} in {} millis", new Object[] { original, elapsedMillis });
session.transfer(original, REL_ORIGINAL);
} catch (final SQLException e) {
getLogger().error("Unable to query {} due to {}", new Object[] { original, e.getCause() == null ? e : e.getCause() });
session.remove(createdFlowFiles);
session.transfer(original, REL_FAILURE);
} catch (final Exception e) {
getLogger().error("Unable to query {} due to {}", new Object[] { original, e });
session.remove(createdFlowFiles);
session.transfer(original, REL_FAILURE);
}
session.adjustCounter("Records Read", recordsRead, false);
}
Aggregations