Search in sources :

Example 6 with RecordSetWriterFactory

use of org.apache.nifi.serialization.RecordSetWriterFactory in project nifi by apache.

the class TestPublisherLease method testRecordsSentToRecordWriterAndThenToProducer.

@Test
public void testRecordsSentToRecordWriterAndThenToProducer() throws IOException, SchemaNotFoundException, MalformedRecordException {
    final PublisherLease lease = new PublisherLease(producer, 1024 * 1024, 10L, logger, true, null, StandardCharsets.UTF_8);
    final FlowFile flowFile = new MockFlowFile(1L);
    final byte[] exampleInput = "101, John Doe, 48\n102, Jane Doe, 47".getBytes(StandardCharsets.UTF_8);
    final MockRecordParser readerService = new MockRecordParser();
    readerService.addSchemaField("person_id", RecordFieldType.LONG);
    readerService.addSchemaField("name", RecordFieldType.STRING);
    readerService.addSchemaField("age", RecordFieldType.INT);
    final RecordReader reader = readerService.createRecordReader(Collections.emptyMap(), new ByteArrayInputStream(exampleInput), logger);
    final RecordSet recordSet = reader.createRecordSet();
    final RecordSchema schema = reader.getSchema();
    final String topic = "unit-test";
    final String keyField = "person_id";
    final RecordSetWriterFactory writerFactory = Mockito.mock(RecordSetWriterFactory.class);
    final RecordSetWriter writer = Mockito.mock(RecordSetWriter.class);
    Mockito.when(writer.write(Mockito.any(Record.class))).thenReturn(WriteResult.of(1, Collections.emptyMap()));
    Mockito.when(writerFactory.createWriter(eq(logger), eq(schema), any())).thenReturn(writer);
    lease.publish(flowFile, recordSet, writerFactory, schema, keyField, topic);
    verify(writerFactory, times(2)).createWriter(eq(logger), eq(schema), any());
    verify(writer, times(2)).write(any(Record.class));
    verify(producer, times(2)).send(any(), any());
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) MockFlowFile(org.apache.nifi.util.MockFlowFile) RecordReader(org.apache.nifi.serialization.RecordReader) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) MockFlowFile(org.apache.nifi.util.MockFlowFile) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) ByteArrayInputStream(java.io.ByteArrayInputStream) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) Record(org.apache.nifi.serialization.record.Record) RecordSet(org.apache.nifi.serialization.record.RecordSet) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) MockRecordParser(org.apache.nifi.processors.kafka.pubsub.util.MockRecordParser) Test(org.junit.Test)

Example 7 with RecordSetWriterFactory

use of org.apache.nifi.serialization.RecordSetWriterFactory in project nifi by apache.

the class FetchParquetTest method configure.

private void configure(final FetchParquet fetchParquet) throws InitializationException {
    testRunner = TestRunners.newTestRunner(fetchParquet);
    testRunner.setProperty(FetchParquet.HADOOP_CONFIGURATION_RESOURCES, TEST_CONF_PATH);
    final RecordSetWriterFactory writerFactory = new MockRecordWriter(RECORD_HEADER, false);
    testRunner.addControllerService("mock-writer-factory", writerFactory);
    testRunner.enableControllerService(writerFactory);
    testRunner.setProperty(FetchParquet.RECORD_WRITER, "mock-writer-factory");
}
Also used : RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) MockRecordWriter(org.apache.nifi.serialization.record.MockRecordWriter)

Example 8 with RecordSetWriterFactory

use of org.apache.nifi.serialization.RecordSetWriterFactory in project nifi by apache.

the class FetchParquetTest method testIOExceptionWhileWritingShouldRouteToRetry.

@Test
public void testIOExceptionWhileWritingShouldRouteToRetry() throws InitializationException, IOException, SchemaNotFoundException {
    configure(proc);
    final RecordSetWriter recordSetWriter = Mockito.mock(RecordSetWriter.class);
    when(recordSetWriter.write(any(Record.class))).thenThrow(new IOException("IOException"));
    final RecordSetWriterFactory recordSetWriterFactory = Mockito.mock(RecordSetWriterFactory.class);
    when(recordSetWriterFactory.getIdentifier()).thenReturn("mock-writer-factory");
    when(recordSetWriterFactory.createWriter(any(ComponentLog.class), any(RecordSchema.class), any(OutputStream.class))).thenReturn(recordSetWriter);
    testRunner.addControllerService("mock-writer-factory", recordSetWriterFactory);
    testRunner.enableControllerService(recordSetWriterFactory);
    testRunner.setProperty(FetchParquet.RECORD_WRITER, "mock-writer-factory");
    final File parquetDir = new File(DIRECTORY);
    final File parquetFile = new File(parquetDir, "testFetchParquetToCSV.parquet");
    final int numUsers = 10;
    writeParquetUsers(parquetFile, numUsers);
    final Map<String, String> attributes = new HashMap<>();
    attributes.put(CoreAttributes.PATH.key(), parquetDir.getAbsolutePath());
    attributes.put(CoreAttributes.FILENAME.key(), parquetFile.getName());
    testRunner.enqueue("TRIGGER", attributes);
    testRunner.run();
    testRunner.assertAllFlowFilesTransferred(FetchParquet.REL_RETRY, 1);
    final MockFlowFile flowFile = testRunner.getFlowFilesForRelationship(FetchParquet.REL_RETRY).get(0);
    flowFile.assertContentEquals("TRIGGER");
}
Also used : HashMap(java.util.HashMap) OutputStream(java.io.OutputStream) IOException(java.io.IOException) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) ComponentLog(org.apache.nifi.logging.ComponentLog) MockFlowFile(org.apache.nifi.util.MockFlowFile) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) Record(org.apache.nifi.serialization.record.Record) GenericRecord(org.apache.avro.generic.GenericRecord) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) FlowFile(org.apache.nifi.flowfile.FlowFile) File(java.io.File) MockFlowFile(org.apache.nifi.util.MockFlowFile) Test(org.junit.Test)

Example 9 with RecordSetWriterFactory

use of org.apache.nifi.serialization.RecordSetWriterFactory in project nifi by apache.

the class ConsumeKafkaRecord_1_0 method createConsumerPool.

protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) {
    final int maxLeases = context.getMaxConcurrentTasks();
    final long maxUncommittedTime = context.getProperty(MAX_UNCOMMITTED_TIME).asTimePeriod(TimeUnit.MILLISECONDS);
    final Map<String, Object> props = new HashMap<>();
    KafkaProcessorUtils.buildCommonKafkaProperties(context, ConsumerConfig.class, props);
    props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
    props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
    props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
    final String topicListing = context.getProperty(ConsumeKafkaRecord_1_0.TOPICS).evaluateAttributeExpressions().getValue();
    final String topicType = context.getProperty(ConsumeKafkaRecord_1_0.TOPIC_TYPE).evaluateAttributeExpressions().getValue();
    final List<String> topics = new ArrayList<>();
    final String securityProtocol = context.getProperty(KafkaProcessorUtils.SECURITY_PROTOCOL).getValue();
    final String bootstrapServers = context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue();
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final boolean honorTransactions = context.getProperty(HONOR_TRANSACTIONS).asBoolean();
    final String charsetName = context.getProperty(MESSAGE_HEADER_ENCODING).evaluateAttributeExpressions().getValue();
    final Charset charset = Charset.forName(charsetName);
    final String headerNameRegex = context.getProperty(HEADER_NAME_REGEX).getValue();
    final Pattern headerNamePattern = headerNameRegex == null ? null : Pattern.compile(headerNameRegex);
    if (topicType.equals(TOPIC_NAME.getValue())) {
        for (final String topic : topicListing.split(",", 100)) {
            final String trimmedName = topic.trim();
            if (!trimmedName.isEmpty()) {
                topics.add(trimmedName);
            }
        }
        return new ConsumerPool(maxLeases, readerFactory, writerFactory, props, topics, maxUncommittedTime, securityProtocol, bootstrapServers, log, honorTransactions, charset, headerNamePattern);
    } else if (topicType.equals(TOPIC_PATTERN.getValue())) {
        final Pattern topicPattern = Pattern.compile(topicListing.trim());
        return new ConsumerPool(maxLeases, readerFactory, writerFactory, props, topicPattern, maxUncommittedTime, securityProtocol, bootstrapServers, log, honorTransactions, charset, headerNamePattern);
    } else {
        getLogger().error("Subscription type has an unknown value {}", new Object[] { topicType });
        return null;
    }
}
Also used : Pattern(java.util.regex.Pattern) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Charset(java.nio.charset.Charset) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) ByteArrayDeserializer(org.apache.kafka.common.serialization.ByteArrayDeserializer)

Example 10 with RecordSetWriterFactory

use of org.apache.nifi.serialization.RecordSetWriterFactory in project nifi by apache.

the class SplitRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile original = session.get();
    if (original == null) {
        return;
    }
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final int maxRecords = context.getProperty(RECORDS_PER_SPLIT).evaluateAttributeExpressions(original).asInteger();
    final List<FlowFile> splits = new ArrayList<>();
    final Map<String, String> originalAttributes = original.getAttributes();
    try {
        session.read(original, new InputStreamCallback() {

            @Override
            public void process(final InputStream in) throws IOException {
                try (final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger())) {
                    final RecordSchema schema = writerFactory.getSchema(originalAttributes, reader.getSchema());
                    final RecordSet recordSet = reader.createRecordSet();
                    final PushBackRecordSet pushbackSet = new PushBackRecordSet(recordSet);
                    while (pushbackSet.isAnotherRecord()) {
                        FlowFile split = session.create(original);
                        try {
                            final Map<String, String> attributes = new HashMap<>();
                            final WriteResult writeResult;
                            try (final OutputStream out = session.write(split);
                                final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out)) {
                                if (maxRecords == 1) {
                                    final Record record = pushbackSet.next();
                                    writeResult = writer.write(record);
                                } else {
                                    final RecordSet limitedSet = pushbackSet.limit(maxRecords);
                                    writeResult = writer.write(limitedSet);
                                }
                                attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
                                attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
                                attributes.putAll(writeResult.getAttributes());
                                session.adjustCounter("Records Split", writeResult.getRecordCount(), false);
                            }
                            split = session.putAllAttributes(split, attributes);
                        } finally {
                            splits.add(split);
                        }
                    }
                } catch (final SchemaNotFoundException | MalformedRecordException e) {
                    throw new ProcessException("Failed to parse incoming data", e);
                }
            }
        });
    } catch (final ProcessException pe) {
        getLogger().error("Failed to split {}", new Object[] { original, pe });
        session.remove(splits);
        session.transfer(original, REL_FAILURE);
        return;
    }
    session.transfer(original, REL_ORIGINAL);
    session.transfer(splits, REL_SPLITS);
    getLogger().info("Successfully split {} into {} FlowFiles, each containing up to {} records", new Object[] { original, splits.size(), maxRecords });
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) ProcessException(org.apache.nifi.processor.exception.ProcessException) WriteResult(org.apache.nifi.serialization.WriteResult) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) Record(org.apache.nifi.serialization.record.Record) PushBackRecordSet(org.apache.nifi.serialization.record.PushBackRecordSet) RecordSet(org.apache.nifi.serialization.record.RecordSet) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) HashMap(java.util.HashMap) Map(java.util.Map) PushBackRecordSet(org.apache.nifi.serialization.record.PushBackRecordSet)

Aggregations

RecordSetWriterFactory (org.apache.nifi.serialization.RecordSetWriterFactory)29 FlowFile (org.apache.nifi.flowfile.FlowFile)19 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)19 IOException (java.io.IOException)17 RecordReaderFactory (org.apache.nifi.serialization.RecordReaderFactory)15 RecordSetWriter (org.apache.nifi.serialization.RecordSetWriter)14 HashMap (java.util.HashMap)13 ProcessException (org.apache.nifi.processor.exception.ProcessException)13 RecordReader (org.apache.nifi.serialization.RecordReader)13 OutputStream (java.io.OutputStream)12 Record (org.apache.nifi.serialization.record.Record)12 InputStream (java.io.InputStream)11 SchemaNotFoundException (org.apache.nifi.schema.access.SchemaNotFoundException)10 RecordSet (org.apache.nifi.serialization.record.RecordSet)9 ArrayList (java.util.ArrayList)7 Map (java.util.Map)7 ComponentLog (org.apache.nifi.logging.ComponentLog)7 ProcessContext (org.apache.nifi.processor.ProcessContext)7 MockRecordParser (org.apache.nifi.processors.kafka.pubsub.util.MockRecordParser)7 WriteResult (org.apache.nifi.serialization.WriteResult)7