use of org.apache.nifi.serialization.RecordSetWriterFactory in project nifi by apache.
the class TestPublisherLease method testRecordsSentToRecordWriterAndThenToProducer.
@Test
public void testRecordsSentToRecordWriterAndThenToProducer() throws IOException, SchemaNotFoundException, MalformedRecordException {
final PublisherLease lease = new PublisherLease(producer, 1024 * 1024, 10L, logger, true, null, StandardCharsets.UTF_8);
final FlowFile flowFile = new MockFlowFile(1L);
final byte[] exampleInput = "101, John Doe, 48\n102, Jane Doe, 47".getBytes(StandardCharsets.UTF_8);
final MockRecordParser readerService = new MockRecordParser();
readerService.addSchemaField("person_id", RecordFieldType.LONG);
readerService.addSchemaField("name", RecordFieldType.STRING);
readerService.addSchemaField("age", RecordFieldType.INT);
final RecordReader reader = readerService.createRecordReader(Collections.emptyMap(), new ByteArrayInputStream(exampleInput), logger);
final RecordSet recordSet = reader.createRecordSet();
final RecordSchema schema = reader.getSchema();
final String topic = "unit-test";
final String keyField = "person_id";
final RecordSetWriterFactory writerFactory = Mockito.mock(RecordSetWriterFactory.class);
final RecordSetWriter writer = Mockito.mock(RecordSetWriter.class);
Mockito.when(writer.write(Mockito.any(Record.class))).thenReturn(WriteResult.of(1, Collections.emptyMap()));
Mockito.when(writerFactory.createWriter(eq(logger), eq(schema), any())).thenReturn(writer);
lease.publish(flowFile, recordSet, writerFactory, schema, keyField, topic);
verify(writerFactory, times(2)).createWriter(eq(logger), eq(schema), any());
verify(writer, times(2)).write(any(Record.class));
verify(producer, times(2)).send(any(), any());
}
use of org.apache.nifi.serialization.RecordSetWriterFactory in project nifi by apache.
the class FetchParquetTest method configure.
private void configure(final FetchParquet fetchParquet) throws InitializationException {
testRunner = TestRunners.newTestRunner(fetchParquet);
testRunner.setProperty(FetchParquet.HADOOP_CONFIGURATION_RESOURCES, TEST_CONF_PATH);
final RecordSetWriterFactory writerFactory = new MockRecordWriter(RECORD_HEADER, false);
testRunner.addControllerService("mock-writer-factory", writerFactory);
testRunner.enableControllerService(writerFactory);
testRunner.setProperty(FetchParquet.RECORD_WRITER, "mock-writer-factory");
}
use of org.apache.nifi.serialization.RecordSetWriterFactory in project nifi by apache.
the class FetchParquetTest method testIOExceptionWhileWritingShouldRouteToRetry.
@Test
public void testIOExceptionWhileWritingShouldRouteToRetry() throws InitializationException, IOException, SchemaNotFoundException {
configure(proc);
final RecordSetWriter recordSetWriter = Mockito.mock(RecordSetWriter.class);
when(recordSetWriter.write(any(Record.class))).thenThrow(new IOException("IOException"));
final RecordSetWriterFactory recordSetWriterFactory = Mockito.mock(RecordSetWriterFactory.class);
when(recordSetWriterFactory.getIdentifier()).thenReturn("mock-writer-factory");
when(recordSetWriterFactory.createWriter(any(ComponentLog.class), any(RecordSchema.class), any(OutputStream.class))).thenReturn(recordSetWriter);
testRunner.addControllerService("mock-writer-factory", recordSetWriterFactory);
testRunner.enableControllerService(recordSetWriterFactory);
testRunner.setProperty(FetchParquet.RECORD_WRITER, "mock-writer-factory");
final File parquetDir = new File(DIRECTORY);
final File parquetFile = new File(parquetDir, "testFetchParquetToCSV.parquet");
final int numUsers = 10;
writeParquetUsers(parquetFile, numUsers);
final Map<String, String> attributes = new HashMap<>();
attributes.put(CoreAttributes.PATH.key(), parquetDir.getAbsolutePath());
attributes.put(CoreAttributes.FILENAME.key(), parquetFile.getName());
testRunner.enqueue("TRIGGER", attributes);
testRunner.run();
testRunner.assertAllFlowFilesTransferred(FetchParquet.REL_RETRY, 1);
final MockFlowFile flowFile = testRunner.getFlowFilesForRelationship(FetchParquet.REL_RETRY).get(0);
flowFile.assertContentEquals("TRIGGER");
}
use of org.apache.nifi.serialization.RecordSetWriterFactory in project nifi by apache.
the class ConsumeKafkaRecord_1_0 method createConsumerPool.
protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) {
final int maxLeases = context.getMaxConcurrentTasks();
final long maxUncommittedTime = context.getProperty(MAX_UNCOMMITTED_TIME).asTimePeriod(TimeUnit.MILLISECONDS);
final Map<String, Object> props = new HashMap<>();
KafkaProcessorUtils.buildCommonKafkaProperties(context, ConsumerConfig.class, props);
props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
final String topicListing = context.getProperty(ConsumeKafkaRecord_1_0.TOPICS).evaluateAttributeExpressions().getValue();
final String topicType = context.getProperty(ConsumeKafkaRecord_1_0.TOPIC_TYPE).evaluateAttributeExpressions().getValue();
final List<String> topics = new ArrayList<>();
final String securityProtocol = context.getProperty(KafkaProcessorUtils.SECURITY_PROTOCOL).getValue();
final String bootstrapServers = context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue();
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final boolean honorTransactions = context.getProperty(HONOR_TRANSACTIONS).asBoolean();
final String charsetName = context.getProperty(MESSAGE_HEADER_ENCODING).evaluateAttributeExpressions().getValue();
final Charset charset = Charset.forName(charsetName);
final String headerNameRegex = context.getProperty(HEADER_NAME_REGEX).getValue();
final Pattern headerNamePattern = headerNameRegex == null ? null : Pattern.compile(headerNameRegex);
if (topicType.equals(TOPIC_NAME.getValue())) {
for (final String topic : topicListing.split(",", 100)) {
final String trimmedName = topic.trim();
if (!trimmedName.isEmpty()) {
topics.add(trimmedName);
}
}
return new ConsumerPool(maxLeases, readerFactory, writerFactory, props, topics, maxUncommittedTime, securityProtocol, bootstrapServers, log, honorTransactions, charset, headerNamePattern);
} else if (topicType.equals(TOPIC_PATTERN.getValue())) {
final Pattern topicPattern = Pattern.compile(topicListing.trim());
return new ConsumerPool(maxLeases, readerFactory, writerFactory, props, topicPattern, maxUncommittedTime, securityProtocol, bootstrapServers, log, honorTransactions, charset, headerNamePattern);
} else {
getLogger().error("Subscription type has an unknown value {}", new Object[] { topicType });
return null;
}
}
use of org.apache.nifi.serialization.RecordSetWriterFactory in project nifi by apache.
the class SplitRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile original = session.get();
if (original == null) {
return;
}
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final int maxRecords = context.getProperty(RECORDS_PER_SPLIT).evaluateAttributeExpressions(original).asInteger();
final List<FlowFile> splits = new ArrayList<>();
final Map<String, String> originalAttributes = original.getAttributes();
try {
session.read(original, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
try (final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger())) {
final RecordSchema schema = writerFactory.getSchema(originalAttributes, reader.getSchema());
final RecordSet recordSet = reader.createRecordSet();
final PushBackRecordSet pushbackSet = new PushBackRecordSet(recordSet);
while (pushbackSet.isAnotherRecord()) {
FlowFile split = session.create(original);
try {
final Map<String, String> attributes = new HashMap<>();
final WriteResult writeResult;
try (final OutputStream out = session.write(split);
final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out)) {
if (maxRecords == 1) {
final Record record = pushbackSet.next();
writeResult = writer.write(record);
} else {
final RecordSet limitedSet = pushbackSet.limit(maxRecords);
writeResult = writer.write(limitedSet);
}
attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
attributes.putAll(writeResult.getAttributes());
session.adjustCounter("Records Split", writeResult.getRecordCount(), false);
}
split = session.putAllAttributes(split, attributes);
} finally {
splits.add(split);
}
}
} catch (final SchemaNotFoundException | MalformedRecordException e) {
throw new ProcessException("Failed to parse incoming data", e);
}
}
});
} catch (final ProcessException pe) {
getLogger().error("Failed to split {}", new Object[] { original, pe });
session.remove(splits);
session.transfer(original, REL_FAILURE);
return;
}
session.transfer(original, REL_ORIGINAL);
session.transfer(splits, REL_SPLITS);
getLogger().info("Successfully split {} into {} FlowFiles, each containing up to {} records", new Object[] { original, splits.size(), maxRecords });
}
Aggregations