Search in sources :

Example 21 with RecordReader

use of org.apache.nifi.serialization.RecordReader in project nifi by apache.

the class TestPutKudu method testMalformedRecordExceptionFromReaderShouldRouteToFailure.

@Test
public void testMalformedRecordExceptionFromReaderShouldRouteToFailure() throws InitializationException, IOException, MalformedRecordException, SchemaNotFoundException {
    createRecordReader(10);
    final RecordReader recordReader = Mockito.mock(RecordReader.class);
    when(recordReader.nextRecord()).thenThrow(new MalformedRecordException("ERROR"));
    final RecordReaderFactory readerFactory = Mockito.mock(RecordReaderFactory.class);
    when(readerFactory.getIdentifier()).thenReturn("mock-reader-factory");
    when(readerFactory.createRecordReader(any(FlowFile.class), any(InputStream.class), any(ComponentLog.class))).thenReturn(recordReader);
    testRunner.addControllerService("mock-reader-factory", readerFactory);
    testRunner.enableControllerService(readerFactory);
    testRunner.setProperty(PutKudu.RECORD_READER, "mock-reader-factory");
    final String filename = "testMalformedRecordExceptionShouldRouteToFailure-" + System.currentTimeMillis();
    final Map<String, String> flowFileAttributes = new HashMap<>();
    flowFileAttributes.put(CoreAttributes.FILENAME.key(), filename);
    testRunner.enqueue("trigger", flowFileAttributes);
    testRunner.run();
    testRunner.assertAllFlowFilesTransferred(PutKudu.REL_FAILURE, 1);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) MockFlowFile(org.apache.nifi.util.MockFlowFile) HashMap(java.util.HashMap) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) ComponentLog(org.apache.nifi.logging.ComponentLog) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) Test(org.junit.Test)

Example 22 with RecordReader

use of org.apache.nifi.serialization.RecordReader in project nifi by apache.

the class PutMongoRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final RecordReaderFactory recordParserFactory = context.getProperty(RECORD_READER_FACTORY).asControllerService(RecordReaderFactory.class);
    final WriteConcern writeConcern = getWriteConcern(context);
    final MongoCollection<Document> collection = getCollection(context).withWriteConcern(writeConcern);
    List<Document> inserts = new ArrayList<>();
    int ceiling = context.getProperty(INSERT_COUNT).asInteger();
    int added = 0;
    boolean error = false;
    try (final InputStream inStream = session.read(flowFile);
        final RecordReader reader = recordParserFactory.createRecordReader(flowFile, inStream, getLogger())) {
        RecordSchema schema = reader.getSchema();
        Record record;
        while ((record = reader.nextRecord()) != null) {
            // Convert each Record to HashMap and put into the Mongo document
            Map<String, Object> contentMap = (Map<String, Object>) DataTypeUtils.convertRecordFieldtoObject(record, RecordFieldType.RECORD.getRecordDataType(record.getSchema()));
            Document document = new Document();
            for (String name : schema.getFieldNames()) {
                document.put(name, contentMap.get(name));
            }
            inserts.add(document);
            if (inserts.size() == ceiling) {
                collection.insertMany(inserts);
                added += inserts.size();
                inserts = new ArrayList<>();
            }
        }
        if (inserts.size() > 0) {
            collection.insertMany(inserts);
        }
    } catch (SchemaNotFoundException | IOException | MalformedRecordException e) {
        getLogger().error("PutMongoRecord failed with error:", e);
        session.transfer(flowFile, REL_FAILURE);
        error = true;
    } finally {
        if (!error) {
            session.getProvenanceReporter().send(flowFile, context.getProperty(URI).evaluateAttributeExpressions().getValue(), String.format("Added %d documents to MongoDB.", added));
            session.transfer(flowFile, REL_SUCCESS);
            getLogger().info("Inserted {} records into MongoDB", new Object[] { added });
        }
    }
    session.commit();
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Document(org.bson.Document) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) WriteConcern(com.mongodb.WriteConcern) Record(org.apache.nifi.serialization.record.Record) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) Map(java.util.Map)

Example 23 with RecordReader

use of org.apache.nifi.serialization.RecordReader in project nifi by apache.

the class ConsumerLease method writeRecordData.

private void writeRecordData(final ProcessSession session, final List<ConsumerRecord<byte[], byte[]>> records, final TopicPartition topicPartition) {
    // In order to obtain a RecordReader from the RecordReaderFactory, we need to give it a FlowFile.
    // We don't want to create a new FlowFile for each record that we receive, so we will just create
    // a "temporary flowfile" that will be removed in the finally block below and use that to pass to
    // the createRecordReader method.
    RecordSetWriter writer = null;
    try {
        for (final ConsumerRecord<byte[], byte[]> consumerRecord : records) {
            final Map<String, String> attributes = getAttributes(consumerRecord);
            final byte[] recordBytes = consumerRecord.value() == null ? new byte[0] : consumerRecord.value();
            try (final InputStream in = new ByteArrayInputStream(recordBytes)) {
                final RecordReader reader;
                try {
                    reader = readerFactory.createRecordReader(attributes, in, logger);
                } catch (final IOException e) {
                    yield();
                    rollback(topicPartition);
                    handleParseFailure(consumerRecord, session, e, "Failed to parse message from Kafka due to comms failure. Will roll back session and try again momentarily.");
                    closeWriter(writer);
                    return;
                } catch (final Exception e) {
                    handleParseFailure(consumerRecord, session, e);
                    continue;
                }
                Record record;
                while ((record = reader.nextRecord()) != null) {
                    // Determine the bundle for this record.
                    final RecordSchema recordSchema = record.getSchema();
                    final BundleInformation bundleInfo = new BundleInformation(topicPartition, recordSchema, attributes);
                    BundleTracker tracker = bundleMap.get(bundleInfo);
                    if (tracker == null) {
                        FlowFile flowFile = session.create();
                        flowFile = session.putAllAttributes(flowFile, attributes);
                        final OutputStream rawOut = session.write(flowFile);
                        final RecordSchema writeSchema;
                        try {
                            writeSchema = writerFactory.getSchema(flowFile.getAttributes(), recordSchema);
                        } catch (final Exception e) {
                            logger.error("Failed to obtain Schema for FlowFile. Will roll back the Kafka message offsets.", e);
                            rollback(topicPartition);
                            yield();
                            throw new ProcessException(e);
                        }
                        writer = writerFactory.createWriter(logger, writeSchema, rawOut);
                        writer.beginRecordSet();
                        tracker = new BundleTracker(consumerRecord, topicPartition, keyEncoding, writer);
                        tracker.updateFlowFile(flowFile);
                        bundleMap.put(bundleInfo, tracker);
                    } else {
                        writer = tracker.recordWriter;
                    }
                    try {
                        writer.write(record);
                    } catch (final RuntimeException re) {
                        handleParseFailure(consumerRecord, session, re, "Failed to write message from Kafka using the configured Record Writer. " + "Will route message as its own FlowFile to the 'parse.failure' relationship");
                        continue;
                    }
                    tracker.incrementRecordCount(1L);
                    session.adjustCounter("Records Received", 1L, false);
                }
            }
        }
    } catch (final Exception e) {
        logger.error("Failed to properly receive messages from Kafka. Will roll back session and any un-committed offsets from Kafka.", e);
        closeWriter(writer);
        rollback(topicPartition);
        throw new ProcessException(e);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) OutputStream(java.io.OutputStream) IOException(java.io.IOException) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) KafkaException(org.apache.kafka.common.KafkaException) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) ProcessException(org.apache.nifi.processor.exception.ProcessException) ByteArrayInputStream(java.io.ByteArrayInputStream) Record(org.apache.nifi.serialization.record.Record) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) RecordSchema(org.apache.nifi.serialization.record.RecordSchema)

Example 24 with RecordReader

use of org.apache.nifi.serialization.RecordReader in project nifi by apache.

the class PublishKafkaRecord_0_11 method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final List<FlowFile> flowFiles = session.get(FlowFileFilters.newSizeBasedFilter(1, DataUnit.MB, 500));
    if (flowFiles.isEmpty()) {
        return;
    }
    final PublisherPool pool = getPublisherPool(context);
    if (pool == null) {
        context.yield();
        return;
    }
    final String securityProtocol = context.getProperty(KafkaProcessorUtils.SECURITY_PROTOCOL).getValue();
    final String bootstrapServers = context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue();
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final boolean useTransactions = context.getProperty(USE_TRANSACTIONS).asBoolean();
    final long startTime = System.nanoTime();
    try (final PublisherLease lease = pool.obtainPublisher()) {
        if (useTransactions) {
            lease.beginTransaction();
        }
        // Send each FlowFile to Kafka asynchronously.
        final Iterator<FlowFile> itr = flowFiles.iterator();
        while (itr.hasNext()) {
            final FlowFile flowFile = itr.next();
            if (!isScheduled()) {
                // If stopped, re-queue FlowFile instead of sending it
                if (useTransactions) {
                    session.rollback();
                    lease.rollback();
                    return;
                }
                session.transfer(flowFile);
                itr.remove();
                continue;
            }
            final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(flowFile).getValue();
            final String messageKeyField = context.getProperty(MESSAGE_KEY_FIELD).evaluateAttributeExpressions(flowFile).getValue();
            try {
                session.read(flowFile, new InputStreamCallback() {

                    @Override
                    public void process(final InputStream rawIn) throws IOException {
                        try (final InputStream in = new BufferedInputStream(rawIn)) {
                            final RecordReader reader = readerFactory.createRecordReader(flowFile, in, getLogger());
                            final RecordSet recordSet = reader.createRecordSet();
                            final RecordSchema schema = writerFactory.getSchema(flowFile.getAttributes(), recordSet.getSchema());
                            lease.publish(flowFile, recordSet, writerFactory, schema, messageKeyField, topic);
                        } catch (final SchemaNotFoundException | MalformedRecordException e) {
                            throw new ProcessException(e);
                        }
                    }
                });
            } catch (final Exception e) {
                // The FlowFile will be obtained and the error logged below, when calling publishResult.getFailedFlowFiles()
                lease.fail(flowFile, e);
                continue;
            }
        }
        // Complete the send
        final PublishResult publishResult = lease.complete();
        if (publishResult.isFailure()) {
            getLogger().info("Failed to send FlowFile to kafka; transferring to failure");
            session.transfer(flowFiles, REL_FAILURE);
            return;
        }
        // Transfer any successful FlowFiles.
        final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime);
        for (FlowFile success : flowFiles) {
            final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(success).getValue();
            final int msgCount = publishResult.getSuccessfulMessageCount(success);
            success = session.putAttribute(success, MSG_COUNT, String.valueOf(msgCount));
            session.adjustCounter("Messages Sent", msgCount, true);
            final String transitUri = KafkaProcessorUtils.buildTransitURI(securityProtocol, bootstrapServers, topic);
            session.getProvenanceReporter().send(success, transitUri, "Sent " + msgCount + " messages", transmissionMillis);
            session.transfer(success, REL_SUCCESS);
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) IOException(java.io.IOException) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) ProcessException(org.apache.nifi.processor.exception.ProcessException) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) ProcessException(org.apache.nifi.processor.exception.ProcessException) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) BufferedInputStream(java.io.BufferedInputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) RecordSet(org.apache.nifi.serialization.record.RecordSet) RecordSchema(org.apache.nifi.serialization.record.RecordSchema)

Example 25 with RecordReader

use of org.apache.nifi.serialization.RecordReader in project nifi by apache.

the class QueryRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    final FlowFile original = session.get();
    if (original == null) {
        return;
    }
    final StopWatch stopWatch = new StopWatch(true);
    final RecordSetWriterFactory recordSetWriterFactory = context.getProperty(RECORD_WRITER_FACTORY).asControllerService(RecordSetWriterFactory.class);
    final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER_FACTORY).asControllerService(RecordReaderFactory.class);
    final Map<FlowFile, Relationship> transformedFlowFiles = new HashMap<>();
    final Set<FlowFile> createdFlowFiles = new HashSet<>();
    // Determine the Record Reader's schema
    final RecordSchema readerSchema;
    try (final InputStream rawIn = session.read(original)) {
        final Map<String, String> originalAttributes = original.getAttributes();
        final RecordReader reader = recordReaderFactory.createRecordReader(originalAttributes, rawIn, getLogger());
        final RecordSchema inputSchema = reader.getSchema();
        readerSchema = recordSetWriterFactory.getSchema(originalAttributes, inputSchema);
    } catch (final Exception e) {
        getLogger().error("Failed to determine Record Schema from {}; routing to failure", new Object[] { original, e });
        session.transfer(original, REL_FAILURE);
        return;
    }
    // Determine the schema for writing the data
    final Map<String, String> originalAttributes = original.getAttributes();
    int recordsRead = 0;
    try {
        for (final PropertyDescriptor descriptor : context.getProperties().keySet()) {
            if (!descriptor.isDynamic()) {
                continue;
            }
            final Relationship relationship = new Relationship.Builder().name(descriptor.getName()).build();
            // We have to fork a child because we may need to read the input FlowFile more than once,
            // and we cannot call session.read() on the original FlowFile while we are within a write
            // callback for the original FlowFile.
            FlowFile transformed = session.create(original);
            boolean flowFileRemoved = false;
            try {
                final String sql = context.getProperty(descriptor).evaluateAttributeExpressions(original).getValue();
                final AtomicReference<WriteResult> writeResultRef = new AtomicReference<>();
                final QueryResult queryResult;
                if (context.getProperty(CACHE_SCHEMA).asBoolean()) {
                    queryResult = queryWithCache(session, original, sql, context, recordReaderFactory);
                } else {
                    queryResult = query(session, original, sql, context, recordReaderFactory);
                }
                final AtomicReference<String> mimeTypeRef = new AtomicReference<>();
                try {
                    final ResultSet rs = queryResult.getResultSet();
                    transformed = session.write(transformed, new OutputStreamCallback() {

                        @Override
                        public void process(final OutputStream out) throws IOException {
                            final ResultSetRecordSet recordSet;
                            final RecordSchema writeSchema;
                            try {
                                recordSet = new ResultSetRecordSet(rs, readerSchema);
                                final RecordSchema resultSetSchema = recordSet.getSchema();
                                writeSchema = recordSetWriterFactory.getSchema(originalAttributes, resultSetSchema);
                            } catch (final SQLException | SchemaNotFoundException e) {
                                throw new ProcessException(e);
                            }
                            try (final RecordSetWriter resultSetWriter = recordSetWriterFactory.createWriter(getLogger(), writeSchema, out)) {
                                writeResultRef.set(resultSetWriter.write(recordSet));
                                mimeTypeRef.set(resultSetWriter.getMimeType());
                            } catch (final Exception e) {
                                throw new IOException(e);
                            }
                        }
                    });
                } finally {
                    closeQuietly(queryResult);
                }
                recordsRead = Math.max(recordsRead, queryResult.getRecordsRead());
                final WriteResult result = writeResultRef.get();
                if (result.getRecordCount() == 0 && !context.getProperty(INCLUDE_ZERO_RECORD_FLOWFILES).asBoolean()) {
                    session.remove(transformed);
                    flowFileRemoved = true;
                    transformedFlowFiles.remove(transformed);
                    getLogger().info("Transformed {} but the result contained no data so will not pass on a FlowFile", new Object[] { original });
                } else {
                    final Map<String, String> attributesToAdd = new HashMap<>();
                    if (result.getAttributes() != null) {
                        attributesToAdd.putAll(result.getAttributes());
                    }
                    attributesToAdd.put(CoreAttributes.MIME_TYPE.key(), mimeTypeRef.get());
                    attributesToAdd.put("record.count", String.valueOf(result.getRecordCount()));
                    transformed = session.putAllAttributes(transformed, attributesToAdd);
                    transformedFlowFiles.put(transformed, relationship);
                    session.adjustCounter("Records Written", result.getRecordCount(), false);
                }
            } finally {
                // Ensure that we have the FlowFile in the set in case we throw any Exception
                if (!flowFileRemoved) {
                    createdFlowFiles.add(transformed);
                }
            }
        }
        final long elapsedMillis = stopWatch.getElapsed(TimeUnit.MILLISECONDS);
        if (transformedFlowFiles.size() > 0) {
            session.getProvenanceReporter().fork(original, transformedFlowFiles.keySet(), elapsedMillis);
            for (final Map.Entry<FlowFile, Relationship> entry : transformedFlowFiles.entrySet()) {
                final FlowFile transformed = entry.getKey();
                final Relationship relationship = entry.getValue();
                session.getProvenanceReporter().route(transformed, relationship);
                session.transfer(transformed, relationship);
            }
        }
        getLogger().info("Successfully queried {} in {} millis", new Object[] { original, elapsedMillis });
        session.transfer(original, REL_ORIGINAL);
    } catch (final SQLException e) {
        getLogger().error("Unable to query {} due to {}", new Object[] { original, e.getCause() == null ? e : e.getCause() });
        session.remove(createdFlowFiles);
        session.transfer(original, REL_FAILURE);
    } catch (final Exception e) {
        getLogger().error("Unable to query {} due to {}", new Object[] { original, e });
        session.remove(createdFlowFiles);
        session.transfer(original, REL_FAILURE);
    }
    session.adjustCounter("Records Read", recordsRead, false);
}
Also used : HashMap(java.util.HashMap) SQLException(java.sql.SQLException) RecordReader(org.apache.nifi.serialization.RecordReader) OutputStream(java.io.OutputStream) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) ResultSet(java.sql.ResultSet) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) HashSet(java.util.HashSet) FlowFile(org.apache.nifi.flowfile.FlowFile) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) InputStream(java.io.InputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) ResultSetRecordSet(org.apache.nifi.serialization.record.ResultSetRecordSet) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) ProcessException(org.apache.nifi.processor.exception.ProcessException) SQLException(java.sql.SQLException) IOException(java.io.IOException) StopWatch(org.apache.nifi.util.StopWatch) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) ProcessException(org.apache.nifi.processor.exception.ProcessException) WriteResult(org.apache.nifi.serialization.WriteResult) Relationship(org.apache.nifi.processor.Relationship) DynamicRelationship(org.apache.nifi.annotation.behavior.DynamicRelationship) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

RecordReader (org.apache.nifi.serialization.RecordReader)28 InputStream (java.io.InputStream)22 FlowFile (org.apache.nifi.flowfile.FlowFile)22 IOException (java.io.IOException)21 RecordReaderFactory (org.apache.nifi.serialization.RecordReaderFactory)21 ProcessException (org.apache.nifi.processor.exception.ProcessException)18 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)17 HashMap (java.util.HashMap)16 MalformedRecordException (org.apache.nifi.serialization.MalformedRecordException)16 Record (org.apache.nifi.serialization.record.Record)15 RecordSetWriterFactory (org.apache.nifi.serialization.RecordSetWriterFactory)14 SchemaNotFoundException (org.apache.nifi.schema.access.SchemaNotFoundException)13 RecordSetWriter (org.apache.nifi.serialization.RecordSetWriter)12 OutputStream (java.io.OutputStream)10 Map (java.util.Map)10 WriteResult (org.apache.nifi.serialization.WriteResult)8 RecordSet (org.apache.nifi.serialization.record.RecordSet)8 ArrayList (java.util.ArrayList)7 BufferedInputStream (java.io.BufferedInputStream)5 ComponentLog (org.apache.nifi.logging.ComponentLog)5