Search in sources :

Example 21 with RecordReaderFactory

use of org.apache.nifi.serialization.RecordReaderFactory in project nifi by apache.

the class TestPutKudu method testInvalidReaderShouldRouteToFailure.

@Test
public void testInvalidReaderShouldRouteToFailure() throws InitializationException, SchemaNotFoundException, MalformedRecordException, IOException {
    createRecordReader(0);
    // simulate throwing an IOException when the factory creates a reader which is what would happen when
    // invalid Avro is passed to the Avro reader factory
    final RecordReaderFactory readerFactory = Mockito.mock(RecordReaderFactory.class);
    when(readerFactory.getIdentifier()).thenReturn("mock-reader-factory");
    when(readerFactory.createRecordReader(any(FlowFile.class), any(InputStream.class), any(ComponentLog.class))).thenThrow(new IOException("NOT AVRO"));
    testRunner.addControllerService("mock-reader-factory", readerFactory);
    testRunner.enableControllerService(readerFactory);
    testRunner.setProperty(PutKudu.RECORD_READER, "mock-reader-factory");
    final String filename = "testInvalidAvroShouldRouteToFailure-" + System.currentTimeMillis();
    final Map<String, String> flowFileAttributes = new HashMap<>();
    flowFileAttributes.put(CoreAttributes.FILENAME.key(), filename);
    testRunner.enqueue("trigger", flowFileAttributes);
    testRunner.run();
    testRunner.assertAllFlowFilesTransferred(PutKudu.REL_FAILURE, 1);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) MockFlowFile(org.apache.nifi.util.MockFlowFile) HashMap(java.util.HashMap) InputStream(java.io.InputStream) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) Test(org.junit.Test)

Example 22 with RecordReaderFactory

use of org.apache.nifi.serialization.RecordReaderFactory in project nifi by apache.

the class PutMongoRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final RecordReaderFactory recordParserFactory = context.getProperty(RECORD_READER_FACTORY).asControllerService(RecordReaderFactory.class);
    final WriteConcern writeConcern = getWriteConcern(context);
    final MongoCollection<Document> collection = getCollection(context).withWriteConcern(writeConcern);
    List<Document> inserts = new ArrayList<>();
    int ceiling = context.getProperty(INSERT_COUNT).asInteger();
    int added = 0;
    boolean error = false;
    try (final InputStream inStream = session.read(flowFile);
        final RecordReader reader = recordParserFactory.createRecordReader(flowFile, inStream, getLogger())) {
        RecordSchema schema = reader.getSchema();
        Record record;
        while ((record = reader.nextRecord()) != null) {
            // Convert each Record to HashMap and put into the Mongo document
            Map<String, Object> contentMap = (Map<String, Object>) DataTypeUtils.convertRecordFieldtoObject(record, RecordFieldType.RECORD.getRecordDataType(record.getSchema()));
            Document document = new Document();
            for (String name : schema.getFieldNames()) {
                document.put(name, contentMap.get(name));
            }
            inserts.add(document);
            if (inserts.size() == ceiling) {
                collection.insertMany(inserts);
                added += inserts.size();
                inserts = new ArrayList<>();
            }
        }
        if (inserts.size() > 0) {
            collection.insertMany(inserts);
        }
    } catch (SchemaNotFoundException | IOException | MalformedRecordException e) {
        getLogger().error("PutMongoRecord failed with error:", e);
        session.transfer(flowFile, REL_FAILURE);
        error = true;
    } finally {
        if (!error) {
            session.getProvenanceReporter().send(flowFile, context.getProperty(URI).evaluateAttributeExpressions().getValue(), String.format("Added %d documents to MongoDB.", added));
            session.transfer(flowFile, REL_SUCCESS);
            getLogger().info("Inserted {} records into MongoDB", new Object[] { added });
        }
    }
    session.commit();
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Document(org.bson.Document) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) WriteConcern(com.mongodb.WriteConcern) Record(org.apache.nifi.serialization.record.Record) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) Map(java.util.Map)

Example 23 with RecordReaderFactory

use of org.apache.nifi.serialization.RecordReaderFactory in project nifi by apache.

the class ConsumeKafkaRecord_0_10 method createConsumerPool.

protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) {
    final int maxLeases = context.getMaxConcurrentTasks();
    final long maxUncommittedTime = context.getProperty(MAX_UNCOMMITTED_TIME).asTimePeriod(TimeUnit.MILLISECONDS);
    final Map<String, Object> props = new HashMap<>();
    KafkaProcessorUtils.buildCommonKafkaProperties(context, ConsumerConfig.class, props);
    props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
    props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
    props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
    final String topicListing = context.getProperty(ConsumeKafkaRecord_0_10.TOPICS).evaluateAttributeExpressions().getValue();
    final String topicType = context.getProperty(ConsumeKafkaRecord_0_10.TOPIC_TYPE).evaluateAttributeExpressions().getValue();
    final List<String> topics = new ArrayList<>();
    final String securityProtocol = context.getProperty(KafkaProcessorUtils.SECURITY_PROTOCOL).getValue();
    final String bootstrapServers = context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue();
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    if (topicType.equals(TOPIC_NAME.getValue())) {
        for (final String topic : topicListing.split(",", 100)) {
            final String trimmedName = topic.trim();
            if (!trimmedName.isEmpty()) {
                topics.add(trimmedName);
            }
        }
        return new ConsumerPool(maxLeases, readerFactory, writerFactory, props, topics, maxUncommittedTime, securityProtocol, bootstrapServers, log);
    } else if (topicType.equals(TOPIC_PATTERN.getValue())) {
        final Pattern topicPattern = Pattern.compile(topicListing.trim());
        return new ConsumerPool(maxLeases, readerFactory, writerFactory, props, topicPattern, maxUncommittedTime, securityProtocol, bootstrapServers, log);
    } else {
        getLogger().error("Subscription type has an unknown value {}", new Object[] { topicType });
        return null;
    }
}
Also used : Pattern(java.util.regex.Pattern) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) ByteArrayDeserializer(org.apache.kafka.common.serialization.ByteArrayDeserializer)

Example 24 with RecordReaderFactory

use of org.apache.nifi.serialization.RecordReaderFactory in project nifi by apache.

the class ConsumeKafkaRecord_0_11 method createConsumerPool.

protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) {
    final int maxLeases = context.getMaxConcurrentTasks();
    final long maxUncommittedTime = context.getProperty(MAX_UNCOMMITTED_TIME).asTimePeriod(TimeUnit.MILLISECONDS);
    final Map<String, Object> props = new HashMap<>();
    KafkaProcessorUtils.buildCommonKafkaProperties(context, ConsumerConfig.class, props);
    props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
    props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
    props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
    final String topicListing = context.getProperty(ConsumeKafkaRecord_0_11.TOPICS).evaluateAttributeExpressions().getValue();
    final String topicType = context.getProperty(ConsumeKafkaRecord_0_11.TOPIC_TYPE).evaluateAttributeExpressions().getValue();
    final List<String> topics = new ArrayList<>();
    final String securityProtocol = context.getProperty(KafkaProcessorUtils.SECURITY_PROTOCOL).getValue();
    final String bootstrapServers = context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue();
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final boolean honorTransactions = context.getProperty(HONOR_TRANSACTIONS).asBoolean();
    final String charsetName = context.getProperty(MESSAGE_HEADER_ENCODING).evaluateAttributeExpressions().getValue();
    final Charset charset = Charset.forName(charsetName);
    final String headerNameRegex = context.getProperty(HEADER_NAME_REGEX).getValue();
    final Pattern headerNamePattern = headerNameRegex == null ? null : Pattern.compile(headerNameRegex);
    if (topicType.equals(TOPIC_NAME.getValue())) {
        for (final String topic : topicListing.split(",", 100)) {
            final String trimmedName = topic.trim();
            if (!trimmedName.isEmpty()) {
                topics.add(trimmedName);
            }
        }
        return new ConsumerPool(maxLeases, readerFactory, writerFactory, props, topics, maxUncommittedTime, securityProtocol, bootstrapServers, log, honorTransactions, charset, headerNamePattern);
    } else if (topicType.equals(TOPIC_PATTERN.getValue())) {
        final Pattern topicPattern = Pattern.compile(topicListing.trim());
        return new ConsumerPool(maxLeases, readerFactory, writerFactory, props, topicPattern, maxUncommittedTime, securityProtocol, bootstrapServers, log, honorTransactions, charset, headerNamePattern);
    } else {
        getLogger().error("Subscription type has an unknown value {}", new Object[] { topicType });
        return null;
    }
}
Also used : Pattern(java.util.regex.Pattern) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Charset(java.nio.charset.Charset) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) ByteArrayDeserializer(org.apache.kafka.common.serialization.ByteArrayDeserializer)

Example 25 with RecordReaderFactory

use of org.apache.nifi.serialization.RecordReaderFactory in project nifi by apache.

the class PublishKafkaRecord_0_11 method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final List<FlowFile> flowFiles = session.get(FlowFileFilters.newSizeBasedFilter(1, DataUnit.MB, 500));
    if (flowFiles.isEmpty()) {
        return;
    }
    final PublisherPool pool = getPublisherPool(context);
    if (pool == null) {
        context.yield();
        return;
    }
    final String securityProtocol = context.getProperty(KafkaProcessorUtils.SECURITY_PROTOCOL).getValue();
    final String bootstrapServers = context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue();
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final boolean useTransactions = context.getProperty(USE_TRANSACTIONS).asBoolean();
    final long startTime = System.nanoTime();
    try (final PublisherLease lease = pool.obtainPublisher()) {
        if (useTransactions) {
            lease.beginTransaction();
        }
        // Send each FlowFile to Kafka asynchronously.
        final Iterator<FlowFile> itr = flowFiles.iterator();
        while (itr.hasNext()) {
            final FlowFile flowFile = itr.next();
            if (!isScheduled()) {
                // If stopped, re-queue FlowFile instead of sending it
                if (useTransactions) {
                    session.rollback();
                    lease.rollback();
                    return;
                }
                session.transfer(flowFile);
                itr.remove();
                continue;
            }
            final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(flowFile).getValue();
            final String messageKeyField = context.getProperty(MESSAGE_KEY_FIELD).evaluateAttributeExpressions(flowFile).getValue();
            try {
                session.read(flowFile, new InputStreamCallback() {

                    @Override
                    public void process(final InputStream rawIn) throws IOException {
                        try (final InputStream in = new BufferedInputStream(rawIn)) {
                            final RecordReader reader = readerFactory.createRecordReader(flowFile, in, getLogger());
                            final RecordSet recordSet = reader.createRecordSet();
                            final RecordSchema schema = writerFactory.getSchema(flowFile.getAttributes(), recordSet.getSchema());
                            lease.publish(flowFile, recordSet, writerFactory, schema, messageKeyField, topic);
                        } catch (final SchemaNotFoundException | MalformedRecordException e) {
                            throw new ProcessException(e);
                        }
                    }
                });
            } catch (final Exception e) {
                // The FlowFile will be obtained and the error logged below, when calling publishResult.getFailedFlowFiles()
                lease.fail(flowFile, e);
                continue;
            }
        }
        // Complete the send
        final PublishResult publishResult = lease.complete();
        if (publishResult.isFailure()) {
            getLogger().info("Failed to send FlowFile to kafka; transferring to failure");
            session.transfer(flowFiles, REL_FAILURE);
            return;
        }
        // Transfer any successful FlowFiles.
        final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime);
        for (FlowFile success : flowFiles) {
            final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(success).getValue();
            final int msgCount = publishResult.getSuccessfulMessageCount(success);
            success = session.putAttribute(success, MSG_COUNT, String.valueOf(msgCount));
            session.adjustCounter("Messages Sent", msgCount, true);
            final String transitUri = KafkaProcessorUtils.buildTransitURI(securityProtocol, bootstrapServers, topic);
            session.getProvenanceReporter().send(success, transitUri, "Sent " + msgCount + " messages", transmissionMillis);
            session.transfer(success, REL_SUCCESS);
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) IOException(java.io.IOException) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) ProcessException(org.apache.nifi.processor.exception.ProcessException) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) ProcessException(org.apache.nifi.processor.exception.ProcessException) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) BufferedInputStream(java.io.BufferedInputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) RecordSet(org.apache.nifi.serialization.record.RecordSet) RecordSchema(org.apache.nifi.serialization.record.RecordSchema)

Aggregations

RecordReaderFactory (org.apache.nifi.serialization.RecordReaderFactory)30 IOException (java.io.IOException)22 InputStream (java.io.InputStream)22 FlowFile (org.apache.nifi.flowfile.FlowFile)21 RecordReader (org.apache.nifi.serialization.RecordReader)21 HashMap (java.util.HashMap)17 RecordSetWriterFactory (org.apache.nifi.serialization.RecordSetWriterFactory)16 ProcessException (org.apache.nifi.processor.exception.ProcessException)15 SchemaNotFoundException (org.apache.nifi.schema.access.SchemaNotFoundException)14 MalformedRecordException (org.apache.nifi.serialization.MalformedRecordException)14 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)13 Record (org.apache.nifi.serialization.record.Record)12 Map (java.util.Map)10 RecordSetWriter (org.apache.nifi.serialization.RecordSetWriter)9 OutputStream (java.io.OutputStream)8 ArrayList (java.util.ArrayList)8 ComponentLog (org.apache.nifi.logging.ComponentLog)8 WriteResult (org.apache.nifi.serialization.WriteResult)7 RecordSet (org.apache.nifi.serialization.record.RecordSet)7 MockFlowFile (org.apache.nifi.util.MockFlowFile)6