use of org.apache.nifi.serialization.RecordReader in project nifi by apache.
the class ListenUDPRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final int maxBatchSize = context.getProperty(BATCH_SIZE).asInteger();
final Map<String, FlowFileRecordWriter> flowFileRecordWriters = new HashMap<>();
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
for (int i = 0; i < maxBatchSize; i++) {
// this processor isn't leveraging the error queue so don't bother polling to avoid the overhead
// if the error handling is ever changed to use the error queue then this flag needs to be changed as well
final StandardEvent event = getMessage(true, false, session);
// break out if we don't have any messages, don't yield since we already do a long poll inside getMessage
if (event == null) {
break;
}
// attempt to read all of the records from the current datagram into a list in memory so that we can ensure the
// entire datagram can be read as records, and if not transfer the whole thing to parse.failure
final RecordReader reader;
final List<Record> records = new ArrayList<>();
try (final InputStream in = new ByteArrayInputStream(event.getData())) {
reader = readerFactory.createRecordReader(Collections.emptyMap(), in, getLogger());
Record record;
while ((record = reader.nextRecord()) != null) {
records.add(record);
}
} catch (final Exception e) {
handleParseFailure(event, session, e);
continue;
}
if (records.size() == 0) {
handleParseFailure(event, session, null);
continue;
}
// see if we already started a flow file and writer for the given sender
// if an exception happens creating the flow file or writer, put the event in the error queue to try it again later
FlowFileRecordWriter flowFileRecordWriter = flowFileRecordWriters.get(event.getSender());
if (flowFileRecordWriter == null) {
FlowFile flowFile = null;
OutputStream rawOut = null;
RecordSetWriter writer = null;
try {
flowFile = session.create();
rawOut = session.write(flowFile);
final Record firstRecord = records.get(0);
final RecordSchema recordSchema = firstRecord.getSchema();
final RecordSchema writeSchema = writerFactory.getSchema(Collections.emptyMap(), recordSchema);
writer = writerFactory.createWriter(getLogger(), writeSchema, rawOut);
writer.beginRecordSet();
flowFileRecordWriter = new FlowFileRecordWriter(flowFile, writer);
flowFileRecordWriters.put(event.getSender(), flowFileRecordWriter);
} catch (final Exception ex) {
getLogger().error("Failed to properly initialize record writer. Datagram will be queued for re-processing.", ex);
try {
if (writer != null) {
writer.close();
}
} catch (final Exception e) {
getLogger().warn("Failed to close Record Writer", e);
}
if (rawOut != null) {
IOUtils.closeQuietly(rawOut);
}
if (flowFile != null) {
session.remove(flowFile);
}
context.yield();
break;
}
}
// attempt to write each record, if any record fails then remove the flow file and break out of the loop
final RecordSetWriter writer = flowFileRecordWriter.getRecordWriter();
try {
for (final Record record : records) {
writer.write(record);
}
} catch (Exception e) {
getLogger().error("Failed to write records due to: " + e.getMessage(), e);
IOUtils.closeQuietly(writer);
session.remove(flowFileRecordWriter.getFlowFile());
flowFileRecordWriters.remove(event.getSender());
break;
}
}
for (final Map.Entry<String, FlowFileRecordWriter> entry : flowFileRecordWriters.entrySet()) {
final String sender = entry.getKey();
final FlowFileRecordWriter flowFileRecordWriter = entry.getValue();
final RecordSetWriter writer = flowFileRecordWriter.getRecordWriter();
FlowFile flowFile = flowFileRecordWriter.getFlowFile();
try {
final WriteResult writeResult;
try {
writeResult = writer.finishRecordSet();
} finally {
writer.close();
}
if (writeResult.getRecordCount() == 0) {
session.remove(flowFile);
continue;
}
final Map<String, String> attributes = new HashMap<>();
attributes.putAll(getAttributes(sender));
attributes.putAll(writeResult.getAttributes());
attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.getRecordCount()));
flowFile = session.putAllAttributes(flowFile, attributes);
session.transfer(flowFile, REL_SUCCESS);
final String transitUri = getTransitUri(sender);
session.getProvenanceReporter().receive(flowFile, transitUri);
} catch (final Exception e) {
getLogger().error("Unable to properly complete record set due to: " + e.getMessage(), e);
session.remove(flowFile);
}
}
}
use of org.apache.nifi.serialization.RecordReader in project nifi by apache.
the class PartitionRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final Map<String, RecordPath> recordPaths;
try {
recordPaths = context.getProperties().keySet().stream().filter(prop -> prop.isDynamic()).collect(Collectors.toMap(prop -> prop.getName(), prop -> getRecordPath(context, prop, flowFile)));
} catch (final Exception e) {
getLogger().error("Failed to compile RecordPath for {}; routing to failure", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
return;
}
final Map<RecordValueMap, RecordSetWriter> writerMap = new HashMap<>();
try (final InputStream in = session.read(flowFile)) {
final Map<String, String> originalAttributes = flowFile.getAttributes();
final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger());
final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema());
Record record;
while ((record = reader.nextRecord()) != null) {
final Map<String, List<ValueWrapper>> recordMap = new HashMap<>();
// Evaluate all of the RecordPath's for this Record
for (final Map.Entry<String, RecordPath> entry : recordPaths.entrySet()) {
final String propName = entry.getKey();
final RecordPath recordPath = entry.getValue();
final Stream<FieldValue> fieldValueStream = recordPath.evaluate(record).getSelectedFields();
final List<ValueWrapper> fieldValues = fieldValueStream.map(fieldVal -> new ValueWrapper(fieldVal.getValue())).collect(Collectors.toList());
recordMap.put(propName, fieldValues);
}
final RecordValueMap recordValueMap = new RecordValueMap(recordMap);
// Get the RecordSetWriter that contains the same values for all RecordPaths - or create one if none exists.
RecordSetWriter writer = writerMap.get(recordValueMap);
if (writer == null) {
final FlowFile childFlowFile = session.create(flowFile);
recordValueMap.setFlowFile(childFlowFile);
final OutputStream out = session.write(childFlowFile);
writer = writerFactory.createWriter(getLogger(), writeSchema, out);
writer.beginRecordSet();
writerMap.put(recordValueMap, writer);
}
writer.write(record);
}
// For each RecordSetWriter, finish the record set and close the writer.
for (final Map.Entry<RecordValueMap, RecordSetWriter> entry : writerMap.entrySet()) {
final RecordValueMap valueMap = entry.getKey();
final RecordSetWriter writer = entry.getValue();
final WriteResult writeResult = writer.finishRecordSet();
writer.close();
final Map<String, String> attributes = new HashMap<>();
attributes.putAll(valueMap.getAttributes());
attributes.putAll(writeResult.getAttributes());
attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
FlowFile childFlowFile = valueMap.getFlowFile();
childFlowFile = session.putAllAttributes(childFlowFile, attributes);
session.adjustCounter("Record Processed", writeResult.getRecordCount(), false);
}
} catch (final Exception e) {
for (final Map.Entry<RecordValueMap, RecordSetWriter> entry : writerMap.entrySet()) {
final RecordValueMap valueMap = entry.getKey();
final RecordSetWriter writer = entry.getValue();
try {
writer.close();
} catch (final IOException e1) {
getLogger().warn("Failed to close Record Writer for {}; some resources may not be cleaned up appropriately", new Object[] { flowFile, e1 });
}
session.remove(valueMap.getFlowFile());
}
getLogger().error("Failed to partition {}", new Object[] { flowFile, e });
session.transfer(flowFile, REL_FAILURE);
return;
}
// because we want to ensure that we are able to remove the child flowfiles in case of a failure.
for (final RecordValueMap valueMap : writerMap.keySet()) {
session.transfer(valueMap.getFlowFile(), REL_SUCCESS);
}
session.transfer(flowFile, REL_ORIGINAL);
}
use of org.apache.nifi.serialization.RecordReader in project nifi by apache.
the class PutParquetTest method testIOExceptionFromReaderShouldRouteToRetry.
@Test
public void testIOExceptionFromReaderShouldRouteToRetry() throws InitializationException, IOException, MalformedRecordException, SchemaNotFoundException {
configure(proc, 10);
final RecordSet recordSet = Mockito.mock(RecordSet.class);
when(recordSet.next()).thenThrow(new IOException("ERROR"));
final RecordReader recordReader = Mockito.mock(RecordReader.class);
when(recordReader.createRecordSet()).thenReturn(recordSet);
when(recordReader.getSchema()).thenReturn(AvroTypeUtil.createSchema(schema));
final RecordReaderFactory readerFactory = Mockito.mock(RecordReaderFactory.class);
when(readerFactory.getIdentifier()).thenReturn("mock-reader-factory");
when(readerFactory.createRecordReader(any(FlowFile.class), any(InputStream.class), any(ComponentLog.class))).thenReturn(recordReader);
testRunner.addControllerService("mock-reader-factory", readerFactory);
testRunner.enableControllerService(readerFactory);
testRunner.setProperty(PutParquet.RECORD_READER, "mock-reader-factory");
final String filename = "testMalformedRecordExceptionShouldRouteToFailure-" + System.currentTimeMillis();
final Map<String, String> flowFileAttributes = new HashMap<>();
flowFileAttributes.put(CoreAttributes.FILENAME.key(), filename);
testRunner.enqueue("trigger", flowFileAttributes);
testRunner.run();
testRunner.assertAllFlowFilesTransferred(PutParquet.REL_RETRY, 1);
}
use of org.apache.nifi.serialization.RecordReader in project nifi by apache.
the class PutParquetTest method testMalformedRecordExceptionFromReaderShouldRouteToFailure.
@Test
public void testMalformedRecordExceptionFromReaderShouldRouteToFailure() throws InitializationException, IOException, MalformedRecordException, SchemaNotFoundException {
configure(proc, 10);
final RecordReader recordReader = Mockito.mock(RecordReader.class);
when(recordReader.nextRecord()).thenThrow(new MalformedRecordException("ERROR"));
final RecordReaderFactory readerFactory = Mockito.mock(RecordReaderFactory.class);
when(readerFactory.getIdentifier()).thenReturn("mock-reader-factory");
when(readerFactory.createRecordReader(any(FlowFile.class), any(InputStream.class), any(ComponentLog.class))).thenReturn(recordReader);
testRunner.addControllerService("mock-reader-factory", readerFactory);
testRunner.enableControllerService(readerFactory);
testRunner.setProperty(PutParquet.RECORD_READER, "mock-reader-factory");
final String filename = "testMalformedRecordExceptionShouldRouteToFailure-" + System.currentTimeMillis();
final Map<String, String> flowFileAttributes = new HashMap<>();
flowFileAttributes.put(CoreAttributes.FILENAME.key(), filename);
testRunner.enqueue("trigger", flowFileAttributes);
testRunner.run();
testRunner.assertAllFlowFilesTransferred(PutParquet.REL_FAILURE, 1);
}
use of org.apache.nifi.serialization.RecordReader in project nifi by apache.
the class AbstractKudu method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final FlowFile flowFile = session.get();
try {
if (flowFile == null)
return;
final Map<String, String> attributes = new HashMap<String, String>();
final AtomicReference<Throwable> exceptionHolder = new AtomicReference<>(null);
final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final KuduSession kuduSession = this.getKuduSession(kuduClient);
session.read(flowFile, (final InputStream rawIn) -> {
RecordReader recordReader = null;
try (final BufferedInputStream in = new BufferedInputStream(rawIn)) {
try {
recordReader = recordReaderFactory.createRecordReader(flowFile, in, getLogger());
} catch (Exception ex) {
final RecordReaderFactoryException rrfe = new RecordReaderFactoryException("Unable to create RecordReader", ex);
exceptionHolder.set(rrfe);
return;
}
List<String> fieldNames = recordReader.getSchema().getFieldNames();
final RecordSet recordSet = recordReader.createRecordSet();
if (skipHeadLine)
recordSet.next();
int numOfAddedRecord = 0;
Record record = recordSet.next();
while (record != null) {
org.apache.kudu.client.Operation oper = null;
if (operationType == OperationType.UPSERT) {
oper = upsertRecordToKudu(kuduTable, record, fieldNames);
} else {
oper = insertRecordToKudu(kuduTable, record, fieldNames);
}
kuduSession.apply(oper);
numOfAddedRecord++;
record = recordSet.next();
}
getLogger().info("KUDU: number of inserted records: " + numOfAddedRecord);
attributes.put(RECORD_COUNT_ATTR, String.valueOf(numOfAddedRecord));
} catch (KuduException ex) {
getLogger().error("Exception occurred while interacting with Kudu due to " + ex.getMessage(), ex);
exceptionHolder.set(ex);
} catch (Exception e) {
exceptionHolder.set(e);
} finally {
IOUtils.closeQuietly(recordReader);
}
});
kuduSession.close();
if (exceptionHolder.get() != null) {
throw exceptionHolder.get();
}
// Update flow file's attributes after the ingestion
session.putAllAttributes(flowFile, attributes);
session.transfer(flowFile, REL_SUCCESS);
session.getProvenanceReporter().send(flowFile, "Successfully added flowfile to kudu");
} catch (IOException | FlowFileAccessException e) {
getLogger().error("Failed to write due to {}", new Object[] { e });
session.transfer(flowFile, REL_FAILURE);
} catch (Throwable t) {
getLogger().error("Failed to write due to {}", new Object[] { t });
session.transfer(flowFile, REL_FAILURE);
}
}
Aggregations