use of org.apache.nifi.serialization.RecordReader in project nifi by apache.
the class FlowFileTable method getRowType.
@Override
public RelDataType getRowType(final RelDataTypeFactory typeFactory) {
if (relDataType != null) {
return relDataType;
}
RecordSchema schema;
try (final InputStream in = session.read(flowFile)) {
final RecordReader recordParser = recordParserFactory.createRecordReader(flowFile, in, logger);
schema = recordParser.getSchema();
} catch (final Exception e) {
throw new ProcessException("Failed to determine schema of data records for " + flowFile, e);
}
final List<String> names = new ArrayList<>();
final List<RelDataType> types = new ArrayList<>();
final JavaTypeFactory javaTypeFactory = (JavaTypeFactory) typeFactory;
for (final RecordField field : schema.getFields()) {
names.add(field.getFieldName());
types.add(getRelDataType(field.getDataType(), javaTypeFactory));
}
logger.debug("Found Schema: {}", new Object[] { schema });
if (recordSchema == null) {
recordSchema = schema;
}
relDataType = typeFactory.createStructType(Pair.zip(names, types));
return relDataType;
}
use of org.apache.nifi.serialization.RecordReader in project nifi by apache.
the class TestConsumeAzureEventHub method setupRecordReader.
private void setupRecordReader(List<EventData> eventDataList, int throwExceptionAt, String writeFailureWith) throws MalformedRecordException, IOException, SchemaNotFoundException {
final RecordReaderFactory readerFactory = mock(RecordReaderFactory.class);
processor.setReaderFactory(readerFactory);
final RecordReader reader = mock(RecordReader.class);
when(readerFactory.createRecordReader(anyMap(), any(), any())).thenReturn(reader);
final List<Record> recordList = eventDataList.stream().map(eventData -> toRecord(new String(eventData.getBytes()))).collect(Collectors.toList());
// Add null to indicate the end of records.
final Function<List<Record>, List<Record>> addEndRecord = rs -> rs.stream().flatMap(r -> r.getAsString("value").equals(writeFailureWith) ? Stream.of(r) : Stream.of(r, null)).collect(Collectors.toList());
final List<Record> recordSetList = addEndRecord.apply(recordList);
final Record[] records = recordSetList.toArray(new Record[recordSetList.size()]);
switch(throwExceptionAt) {
case -1:
when(reader.nextRecord()).thenReturn(records[0], Arrays.copyOfRange(records, 1, records.length));
break;
case 0:
when(reader.nextRecord()).thenThrow(new MalformedRecordException("Simulating Record parse failure.")).thenReturn(records[0], Arrays.copyOfRange(records, 1, records.length));
break;
default:
final List<Record> recordList1 = addEndRecord.apply(recordList.subList(0, throwExceptionAt));
final List<Record> recordList2 = addEndRecord.apply(recordList.subList(throwExceptionAt + 1, recordList.size()));
final Record[] records1 = recordList1.toArray(new Record[recordList1.size()]);
final Record[] records2 = recordList2.toArray(new Record[recordList2.size()]);
when(reader.nextRecord()).thenReturn(records1[0], Arrays.copyOfRange(records1, 1, records1.length)).thenThrow(new MalformedRecordException("Simulating Record parse failure.")).thenReturn(records2[0], Arrays.copyOfRange(records2, 1, records2.length));
}
}
use of org.apache.nifi.serialization.RecordReader in project nifi by apache.
the class PutHBaseRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final RecordReaderFactory recordParserFactory = context.getProperty(RECORD_READER_FACTORY).asControllerService(RecordReaderFactory.class);
List<PutFlowFile> flowFiles = new ArrayList<>();
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String rowFieldName = context.getProperty(ROW_FIELD_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String columnFamily = context.getProperty(COLUMN_FAMILY).evaluateAttributeExpressions(flowFile).getValue();
final String timestampFieldName = context.getProperty(TIMESTAMP_FIELD_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String fieldEncodingStrategy = context.getProperty(FIELD_ENCODING_STRATEGY).getValue();
final String complexFieldStrategy = context.getProperty(COMPLEX_FIELD_STRATEGY).getValue();
final String rowEncodingStrategy = context.getProperty(ROW_ID_ENCODING_STRATEGY).getValue();
final long start = System.nanoTime();
int index = 0;
int columns = 0;
boolean failed = false;
String startIndexStr = flowFile.getAttribute("restart.index");
int startIndex = -1;
if (startIndexStr != null) {
startIndex = Integer.parseInt(startIndexStr);
}
PutFlowFile last = null;
try (final InputStream in = session.read(flowFile);
final RecordReader reader = recordParserFactory.createRecordReader(flowFile, in, getLogger())) {
Record record;
if (startIndex >= 0) {
while (index++ < startIndex && (reader.nextRecord()) != null) {
}
}
while ((record = reader.nextRecord()) != null) {
PutFlowFile putFlowFile = createPut(context, record, reader.getSchema(), flowFile, rowFieldName, columnFamily, timestampFieldName, fieldEncodingStrategy, rowEncodingStrategy, complexFieldStrategy);
if (putFlowFile.getColumns().size() == 0) {
continue;
}
flowFiles.add(putFlowFile);
index++;
if (flowFiles.size() == batchSize) {
columns += addBatch(tableName, flowFiles);
last = flowFiles.get(flowFiles.size() - 1);
flowFiles = new ArrayList<>();
}
}
if (flowFiles.size() > 0) {
columns += addBatch(tableName, flowFiles);
last = flowFiles.get(flowFiles.size() - 1);
}
} catch (Exception ex) {
getLogger().error("Failed to put records to HBase.", ex);
failed = true;
}
if (!failed) {
if (columns > 0) {
sendProvenance(session, flowFile, columns, System.nanoTime() - start, last);
}
flowFile = session.removeAttribute(flowFile, "restart.index");
session.transfer(flowFile, REL_SUCCESS);
} else {
String restartIndex = Integer.toString(index - flowFiles.size());
flowFile = session.putAttribute(flowFile, "restart.index", restartIndex);
if (columns > 0) {
sendProvenance(session, flowFile, columns, System.nanoTime() - start, last);
}
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_FAILURE);
}
session.commit();
}
Aggregations