use of org.apache.nifi.serialization.RecordReaderFactory in project nifi by apache.
the class QueryRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
final FlowFile original = session.get();
if (original == null) {
return;
}
final StopWatch stopWatch = new StopWatch(true);
final RecordSetWriterFactory recordSetWriterFactory = context.getProperty(RECORD_WRITER_FACTORY).asControllerService(RecordSetWriterFactory.class);
final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER_FACTORY).asControllerService(RecordReaderFactory.class);
final Map<FlowFile, Relationship> transformedFlowFiles = new HashMap<>();
final Set<FlowFile> createdFlowFiles = new HashSet<>();
// Determine the Record Reader's schema
final RecordSchema readerSchema;
try (final InputStream rawIn = session.read(original)) {
final Map<String, String> originalAttributes = original.getAttributes();
final RecordReader reader = recordReaderFactory.createRecordReader(originalAttributes, rawIn, getLogger());
final RecordSchema inputSchema = reader.getSchema();
readerSchema = recordSetWriterFactory.getSchema(originalAttributes, inputSchema);
} catch (final Exception e) {
getLogger().error("Failed to determine Record Schema from {}; routing to failure", new Object[] { original, e });
session.transfer(original, REL_FAILURE);
return;
}
// Determine the schema for writing the data
final Map<String, String> originalAttributes = original.getAttributes();
int recordsRead = 0;
try {
for (final PropertyDescriptor descriptor : context.getProperties().keySet()) {
if (!descriptor.isDynamic()) {
continue;
}
final Relationship relationship = new Relationship.Builder().name(descriptor.getName()).build();
// We have to fork a child because we may need to read the input FlowFile more than once,
// and we cannot call session.read() on the original FlowFile while we are within a write
// callback for the original FlowFile.
FlowFile transformed = session.create(original);
boolean flowFileRemoved = false;
try {
final String sql = context.getProperty(descriptor).evaluateAttributeExpressions(original).getValue();
final AtomicReference<WriteResult> writeResultRef = new AtomicReference<>();
final QueryResult queryResult;
if (context.getProperty(CACHE_SCHEMA).asBoolean()) {
queryResult = queryWithCache(session, original, sql, context, recordReaderFactory);
} else {
queryResult = query(session, original, sql, context, recordReaderFactory);
}
final AtomicReference<String> mimeTypeRef = new AtomicReference<>();
try {
final ResultSet rs = queryResult.getResultSet();
transformed = session.write(transformed, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
final ResultSetRecordSet recordSet;
final RecordSchema writeSchema;
try {
recordSet = new ResultSetRecordSet(rs, readerSchema);
final RecordSchema resultSetSchema = recordSet.getSchema();
writeSchema = recordSetWriterFactory.getSchema(originalAttributes, resultSetSchema);
} catch (final SQLException | SchemaNotFoundException e) {
throw new ProcessException(e);
}
try (final RecordSetWriter resultSetWriter = recordSetWriterFactory.createWriter(getLogger(), writeSchema, out)) {
writeResultRef.set(resultSetWriter.write(recordSet));
mimeTypeRef.set(resultSetWriter.getMimeType());
} catch (final Exception e) {
throw new IOException(e);
}
}
});
} finally {
closeQuietly(queryResult);
}
recordsRead = Math.max(recordsRead, queryResult.getRecordsRead());
final WriteResult result = writeResultRef.get();
if (result.getRecordCount() == 0 && !context.getProperty(INCLUDE_ZERO_RECORD_FLOWFILES).asBoolean()) {
session.remove(transformed);
flowFileRemoved = true;
transformedFlowFiles.remove(transformed);
getLogger().info("Transformed {} but the result contained no data so will not pass on a FlowFile", new Object[] { original });
} else {
final Map<String, String> attributesToAdd = new HashMap<>();
if (result.getAttributes() != null) {
attributesToAdd.putAll(result.getAttributes());
}
attributesToAdd.put(CoreAttributes.MIME_TYPE.key(), mimeTypeRef.get());
attributesToAdd.put("record.count", String.valueOf(result.getRecordCount()));
transformed = session.putAllAttributes(transformed, attributesToAdd);
transformedFlowFiles.put(transformed, relationship);
session.adjustCounter("Records Written", result.getRecordCount(), false);
}
} finally {
// Ensure that we have the FlowFile in the set in case we throw any Exception
if (!flowFileRemoved) {
createdFlowFiles.add(transformed);
}
}
}
final long elapsedMillis = stopWatch.getElapsed(TimeUnit.MILLISECONDS);
if (transformedFlowFiles.size() > 0) {
session.getProvenanceReporter().fork(original, transformedFlowFiles.keySet(), elapsedMillis);
for (final Map.Entry<FlowFile, Relationship> entry : transformedFlowFiles.entrySet()) {
final FlowFile transformed = entry.getKey();
final Relationship relationship = entry.getValue();
session.getProvenanceReporter().route(transformed, relationship);
session.transfer(transformed, relationship);
}
}
getLogger().info("Successfully queried {} in {} millis", new Object[] { original, elapsedMillis });
session.transfer(original, REL_ORIGINAL);
} catch (final SQLException e) {
getLogger().error("Unable to query {} due to {}", new Object[] { original, e.getCause() == null ? e : e.getCause() });
session.remove(createdFlowFiles);
session.transfer(original, REL_FAILURE);
} catch (final Exception e) {
getLogger().error("Unable to query {} due to {}", new Object[] { original, e });
session.remove(createdFlowFiles);
session.transfer(original, REL_FAILURE);
}
session.adjustCounter("Records Read", recordsRead, false);
}
use of org.apache.nifi.serialization.RecordReaderFactory in project nifi by apache.
the class TestListenTCPRecord method setup.
@Before
public void setup() throws InitializationException {
proc = new ListenTCPRecord();
runner = TestRunners.newTestRunner(proc);
runner.setProperty(ListenTCPRecord.PORT, "0");
final String readerId = "record-reader";
final RecordReaderFactory readerFactory = new JsonTreeReader();
runner.addControllerService(readerId, readerFactory);
runner.setProperty(readerFactory, SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaAccessUtils.SCHEMA_TEXT_PROPERTY.getValue());
runner.setProperty(readerFactory, SchemaAccessUtils.SCHEMA_TEXT, SCHEMA_TEXT);
runner.enableControllerService(readerFactory);
final String writerId = "record-writer";
final RecordSetWriterFactory writerFactory = new MockRecordWriter("timestamp, logsource, message");
runner.addControllerService(writerId, writerFactory);
runner.enableControllerService(writerFactory);
runner.setProperty(ListenTCPRecord.RECORD_READER, readerId);
runner.setProperty(ListenTCPRecord.RECORD_WRITER, writerId);
}
use of org.apache.nifi.serialization.RecordReaderFactory in project nifi by apache.
the class TestListenUDPRecord method setup.
@Before
public void setup() throws InitializationException {
proc = new TestableListenUDPRecord();
runner = TestRunners.newTestRunner(proc);
runner.setProperty(ListenUDP.PORT, "1");
final String readerId = "record-reader";
final RecordReaderFactory readerFactory = new JsonTreeReader();
runner.addControllerService(readerId, readerFactory);
runner.setProperty(readerFactory, SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaAccessUtils.SCHEMA_TEXT_PROPERTY.getValue());
runner.setProperty(readerFactory, SchemaAccessUtils.SCHEMA_TEXT, SCHEMA_TEXT);
runner.enableControllerService(readerFactory);
final String writerId = "record-writer";
mockRecordWriter = new MockRecordWriter("timestamp, logsource, message");
runner.addControllerService(writerId, mockRecordWriter);
runner.enableControllerService(mockRecordWriter);
runner.setProperty(ListenUDPRecord.RECORD_READER, readerId);
runner.setProperty(ListenUDPRecord.RECORD_WRITER, writerId);
}
use of org.apache.nifi.serialization.RecordReaderFactory in project nifi by apache.
the class TestConsumeAzureEventHub method setupRecordReader.
private void setupRecordReader(List<EventData> eventDataList, int throwExceptionAt, String writeFailureWith) throws MalformedRecordException, IOException, SchemaNotFoundException {
final RecordReaderFactory readerFactory = mock(RecordReaderFactory.class);
processor.setReaderFactory(readerFactory);
final RecordReader reader = mock(RecordReader.class);
when(readerFactory.createRecordReader(anyMap(), any(), any())).thenReturn(reader);
final List<Record> recordList = eventDataList.stream().map(eventData -> toRecord(new String(eventData.getBytes()))).collect(Collectors.toList());
// Add null to indicate the end of records.
final Function<List<Record>, List<Record>> addEndRecord = rs -> rs.stream().flatMap(r -> r.getAsString("value").equals(writeFailureWith) ? Stream.of(r) : Stream.of(r, null)).collect(Collectors.toList());
final List<Record> recordSetList = addEndRecord.apply(recordList);
final Record[] records = recordSetList.toArray(new Record[recordSetList.size()]);
switch(throwExceptionAt) {
case -1:
when(reader.nextRecord()).thenReturn(records[0], Arrays.copyOfRange(records, 1, records.length));
break;
case 0:
when(reader.nextRecord()).thenThrow(new MalformedRecordException("Simulating Record parse failure.")).thenReturn(records[0], Arrays.copyOfRange(records, 1, records.length));
break;
default:
final List<Record> recordList1 = addEndRecord.apply(recordList.subList(0, throwExceptionAt));
final List<Record> recordList2 = addEndRecord.apply(recordList.subList(throwExceptionAt + 1, recordList.size()));
final Record[] records1 = recordList1.toArray(new Record[recordList1.size()]);
final Record[] records2 = recordList2.toArray(new Record[recordList2.size()]);
when(reader.nextRecord()).thenReturn(records1[0], Arrays.copyOfRange(records1, 1, records1.length)).thenThrow(new MalformedRecordException("Simulating Record parse failure.")).thenReturn(records2[0], Arrays.copyOfRange(records2, 1, records2.length));
}
}
use of org.apache.nifi.serialization.RecordReaderFactory in project nifi by apache.
the class PutHBaseRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final RecordReaderFactory recordParserFactory = context.getProperty(RECORD_READER_FACTORY).asControllerService(RecordReaderFactory.class);
List<PutFlowFile> flowFiles = new ArrayList<>();
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String rowFieldName = context.getProperty(ROW_FIELD_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String columnFamily = context.getProperty(COLUMN_FAMILY).evaluateAttributeExpressions(flowFile).getValue();
final String timestampFieldName = context.getProperty(TIMESTAMP_FIELD_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String fieldEncodingStrategy = context.getProperty(FIELD_ENCODING_STRATEGY).getValue();
final String complexFieldStrategy = context.getProperty(COMPLEX_FIELD_STRATEGY).getValue();
final String rowEncodingStrategy = context.getProperty(ROW_ID_ENCODING_STRATEGY).getValue();
final long start = System.nanoTime();
int index = 0;
int columns = 0;
boolean failed = false;
String startIndexStr = flowFile.getAttribute("restart.index");
int startIndex = -1;
if (startIndexStr != null) {
startIndex = Integer.parseInt(startIndexStr);
}
PutFlowFile last = null;
try (final InputStream in = session.read(flowFile);
final RecordReader reader = recordParserFactory.createRecordReader(flowFile, in, getLogger())) {
Record record;
if (startIndex >= 0) {
while (index++ < startIndex && (reader.nextRecord()) != null) {
}
}
while ((record = reader.nextRecord()) != null) {
PutFlowFile putFlowFile = createPut(context, record, reader.getSchema(), flowFile, rowFieldName, columnFamily, timestampFieldName, fieldEncodingStrategy, rowEncodingStrategy, complexFieldStrategy);
if (putFlowFile.getColumns().size() == 0) {
continue;
}
flowFiles.add(putFlowFile);
index++;
if (flowFiles.size() == batchSize) {
columns += addBatch(tableName, flowFiles);
last = flowFiles.get(flowFiles.size() - 1);
flowFiles = new ArrayList<>();
}
}
if (flowFiles.size() > 0) {
columns += addBatch(tableName, flowFiles);
last = flowFiles.get(flowFiles.size() - 1);
}
} catch (Exception ex) {
getLogger().error("Failed to put records to HBase.", ex);
failed = true;
}
if (!failed) {
if (columns > 0) {
sendProvenance(session, flowFile, columns, System.nanoTime() - start, last);
}
flowFile = session.removeAttribute(flowFile, "restart.index");
session.transfer(flowFile, REL_SUCCESS);
} else {
String restartIndex = Integer.toString(index - flowFiles.size());
flowFile = session.putAttribute(flowFile, "restart.index", restartIndex);
if (columns > 0) {
sendProvenance(session, flowFile, columns, System.nanoTime() - start, last);
}
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_FAILURE);
}
session.commit();
}
Aggregations