Search in sources :

Example 1 with RecordSetWriterFactory

use of org.apache.nifi.serialization.RecordSetWriterFactory in project nifi by apache.

the class TestConsumeAzureEventHub method setupRecordWriter.

private void setupRecordWriter(String throwErrorWith) throws SchemaNotFoundException, IOException {
    final RecordSetWriterFactory writerFactory = mock(RecordSetWriterFactory.class);
    processor.setWriterFactory(writerFactory);
    final RecordSetWriter writer = mock(RecordSetWriter.class);
    final AtomicReference<OutputStream> outRef = new AtomicReference<>();
    when(writerFactory.createWriter(any(), any(), any())).thenAnswer(invocation -> {
        outRef.set(invocation.getArgumentAt(2, OutputStream.class));
        return writer;
    });
    when(writer.write(any(Record.class))).thenAnswer(invocation -> {
        final String value = (String) invocation.getArgumentAt(0, Record.class).getValue("value");
        if (throwErrorWith != null && throwErrorWith.equals(value)) {
            throw new IOException("Simulating record write failure.");
        }
        outRef.get().write(value.getBytes(StandardCharsets.UTF_8));
        return WriteResult.of(1, Collections.emptyMap());
    });
}
Also used : RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) OutputStream(java.io.OutputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) IOException(java.io.IOException) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter)

Example 2 with RecordSetWriterFactory

use of org.apache.nifi.serialization.RecordSetWriterFactory in project nifi by apache.

the class PutDruidRecord method processFlowFile.

/**
 * Parses the record(s), converts each to a Map, and sends via Tranquility to the Druid Indexing Service
 *
 * @param context The process context
 * @param session The process session
 */
@SuppressWarnings("unchecked")
private void processFlowFile(ProcessContext context, final ProcessSession session) {
    final ComponentLog log = getLogger();
    // Get handle on Druid Tranquility session
    DruidTranquilityService tranquilityController = context.getProperty(DRUID_TRANQUILITY_SERVICE).asControllerService(DruidTranquilityService.class);
    Tranquilizer<Map<String, Object>> tranquilizer = tranquilityController.getTranquilizer();
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    // Create the outgoing flow files and output streams
    FlowFile droppedFlowFile = session.create(flowFile);
    final AtomicInteger droppedFlowFileCount = new AtomicInteger(0);
    FlowFile failedFlowFile = session.create(flowFile);
    final AtomicInteger failedFlowFileCount = new AtomicInteger(0);
    FlowFile successfulFlowFile = session.create(flowFile);
    final AtomicInteger successfulFlowFileCount = new AtomicInteger(0);
    final AtomicInteger recordWriteErrors = new AtomicInteger(0);
    int recordCount = 0;
    final OutputStream droppedOutputStream = session.write(droppedFlowFile);
    final RecordSetWriter droppedRecordWriter;
    final OutputStream failedOutputStream = session.write(failedFlowFile);
    final RecordSetWriter failedRecordWriter;
    final OutputStream successfulOutputStream = session.write(successfulFlowFile);
    final RecordSetWriter successfulRecordWriter;
    try (final InputStream in = session.read(flowFile)) {
        final RecordReaderFactory recordParserFactory = context.getProperty(RECORD_READER_FACTORY).asControllerService(RecordReaderFactory.class);
        final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER_FACTORY).asControllerService(RecordSetWriterFactory.class);
        final Map<String, String> attributes = flowFile.getAttributes();
        final RecordReader reader = recordParserFactory.createRecordReader(flowFile, in, getLogger());
        final RecordSchema outSchema = writerFactory.getSchema(attributes, reader.getSchema());
        droppedRecordWriter = writerFactory.createWriter(log, outSchema, droppedOutputStream);
        droppedRecordWriter.beginRecordSet();
        failedRecordWriter = writerFactory.createWriter(log, outSchema, failedOutputStream);
        failedRecordWriter.beginRecordSet();
        successfulRecordWriter = writerFactory.createWriter(log, outSchema, successfulOutputStream);
        successfulRecordWriter.beginRecordSet();
        Record r;
        while ((r = reader.nextRecord()) != null) {
            final Record record = r;
            recordCount++;
            // Convert each Record to HashMap and send to Druid
            Map<String, Object> contentMap = (Map<String, Object>) DataTypeUtils.convertRecordFieldtoObject(r, RecordFieldType.RECORD.getRecordDataType(r.getSchema()));
            log.debug("Tranquilizer Status: {}", new Object[] { tranquilizer.status().toString() });
            // Send data element to Druid asynchronously
            Future<BoxedUnit> future = tranquilizer.send(contentMap);
            log.debug("Sent Payload to Druid: {}", new Object[] { contentMap });
            // Wait for Druid to call back with status
            future.addEventListener(new FutureEventListener<Object>() {

                @Override
                public void onFailure(Throwable cause) {
                    if (cause instanceof MessageDroppedException) {
                        // This happens when event timestamp targets a Druid Indexing task that has closed (Late Arriving Data)
                        log.debug("Record Dropped due to MessageDroppedException: {}, transferring record to dropped.", new Object[] { cause.getMessage() }, cause);
                        try {
                            synchronized (droppedRecordWriter) {
                                droppedRecordWriter.write(record);
                                droppedRecordWriter.flush();
                                droppedFlowFileCount.incrementAndGet();
                            }
                        } catch (final IOException ioe) {
                            log.error("Error transferring record to dropped, this may result in data loss.", new Object[] { ioe.getMessage() }, ioe);
                            recordWriteErrors.incrementAndGet();
                        }
                    } else {
                        log.error("FlowFile Processing Failed due to: {}", new Object[] { cause.getMessage() }, cause);
                        try {
                            synchronized (failedRecordWriter) {
                                failedRecordWriter.write(record);
                                failedRecordWriter.flush();
                                failedFlowFileCount.incrementAndGet();
                            }
                        } catch (final IOException ioe) {
                            log.error("Error transferring record to failure, this may result in data loss.", new Object[] { ioe.getMessage() }, ioe);
                            recordWriteErrors.incrementAndGet();
                        }
                    }
                }

                @Override
                public void onSuccess(Object value) {
                    log.debug(" FlowFile Processing Success: {}", new Object[] { value.toString() });
                    try {
                        synchronized (successfulRecordWriter) {
                            successfulRecordWriter.write(record);
                            successfulRecordWriter.flush();
                            successfulFlowFileCount.incrementAndGet();
                        }
                    } catch (final IOException ioe) {
                        log.error("Error transferring record to success, this may result in data loss. " + "However the record was successfully processed by Druid", new Object[] { ioe.getMessage() }, ioe);
                        recordWriteErrors.incrementAndGet();
                    }
                }
            });
        }
    } catch (IOException | SchemaNotFoundException | MalformedRecordException e) {
        log.error("FlowFile Processing Failed due to: {}", new Object[] { e.getMessage() }, e);
        // The FlowFile will be obtained and the error logged below, when calling publishResult.getFailedFlowFiles()
        flowFile = session.putAttribute(flowFile, RECORD_COUNT, Integer.toString(recordCount));
        session.transfer(flowFile, REL_FAILURE);
        try {
            droppedOutputStream.close();
            session.remove(droppedFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with dropped records.", ioe);
        }
        try {
            failedOutputStream.close();
            session.remove(failedFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with failed records.", ioe);
        }
        try {
            successfulOutputStream.close();
            session.remove(successfulFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with successful records.", ioe);
        }
        session.commit();
        return;
    }
    if (recordCount == 0) {
        // Send original (empty) flow file to success, remove the rest
        flowFile = session.putAttribute(flowFile, RECORD_COUNT, "0");
        session.transfer(flowFile, REL_SUCCESS);
        try {
            droppedOutputStream.close();
            session.remove(droppedFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with dropped records.", ioe);
        }
        try {
            failedOutputStream.close();
            session.remove(failedFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with failed records.", ioe);
        }
        try {
            successfulOutputStream.close();
            session.remove(successfulFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with successful records.", ioe);
        }
    } else {
        // Wait for all the records to finish processing
        while (recordCount != (droppedFlowFileCount.get() + failedFlowFileCount.get() + successfulFlowFileCount.get() + recordWriteErrors.get())) {
            Thread.yield();
        }
        try {
            droppedRecordWriter.finishRecordSet();
            droppedRecordWriter.close();
        } catch (IOException ioe) {
            log.error("Error closing FlowFile with dropped records: {}", new Object[] { ioe.getMessage() }, ioe);
            session.rollback();
            throw new ProcessException(ioe);
        }
        if (droppedFlowFileCount.get() > 0) {
            droppedFlowFile = session.putAttribute(droppedFlowFile, RECORD_COUNT, Integer.toString(droppedFlowFileCount.get()));
            session.transfer(droppedFlowFile, REL_DROPPED);
        } else {
            session.remove(droppedFlowFile);
        }
        try {
            failedRecordWriter.finishRecordSet();
            failedRecordWriter.close();
        } catch (IOException ioe) {
            log.error("Error closing FlowFile with failed records: {}", new Object[] { ioe.getMessage() }, ioe);
            session.rollback();
            throw new ProcessException(ioe);
        }
        if (failedFlowFileCount.get() > 0) {
            failedFlowFile = session.putAttribute(failedFlowFile, RECORD_COUNT, Integer.toString(failedFlowFileCount.get()));
            session.transfer(failedFlowFile, REL_FAILURE);
        } else {
            session.remove(failedFlowFile);
        }
        try {
            successfulRecordWriter.finishRecordSet();
            successfulRecordWriter.close();
        } catch (IOException ioe) {
            log.error("Error closing FlowFile with successful records: {}", new Object[] { ioe.getMessage() }, ioe);
            session.rollback();
            throw new ProcessException(ioe);
        }
        if (successfulFlowFileCount.get() > 0) {
            successfulFlowFile = session.putAttribute(successfulFlowFile, RECORD_COUNT, Integer.toString(successfulFlowFileCount.get()));
            session.transfer(successfulFlowFile, REL_SUCCESS);
            session.getProvenanceReporter().send(successfulFlowFile, tranquilityController.getTransitUri());
        } else {
            session.remove(successfulFlowFile);
        }
        session.remove(flowFile);
    }
    session.commit();
}
Also used : MessageDroppedException(com.metamx.tranquility.tranquilizer.MessageDroppedException) OutputStream(java.io.OutputStream) RecordReader(org.apache.nifi.serialization.RecordReader) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) DruidTranquilityService(org.apache.nifi.controller.api.druid.DruidTranquilityService) Record(org.apache.nifi.serialization.record.Record) BoxedUnit(scala.runtime.BoxedUnit) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) ProcessException(org.apache.nifi.processor.exception.ProcessException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) Map(java.util.Map)

Example 3 with RecordSetWriterFactory

use of org.apache.nifi.serialization.RecordSetWriterFactory in project nifi by apache.

the class PublishKafkaRecord_1_0 method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final List<FlowFile> flowFiles = session.get(FlowFileFilters.newSizeBasedFilter(1, DataUnit.MB, 500));
    if (flowFiles.isEmpty()) {
        return;
    }
    final PublisherPool pool = getPublisherPool(context);
    if (pool == null) {
        context.yield();
        return;
    }
    final String securityProtocol = context.getProperty(KafkaProcessorUtils.SECURITY_PROTOCOL).getValue();
    final String bootstrapServers = context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue();
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final boolean useTransactions = context.getProperty(USE_TRANSACTIONS).asBoolean();
    final long startTime = System.nanoTime();
    try (final PublisherLease lease = pool.obtainPublisher()) {
        if (useTransactions) {
            lease.beginTransaction();
        }
        // Send each FlowFile to Kafka asynchronously.
        final Iterator<FlowFile> itr = flowFiles.iterator();
        while (itr.hasNext()) {
            final FlowFile flowFile = itr.next();
            if (!isScheduled()) {
                // If stopped, re-queue FlowFile instead of sending it
                if (useTransactions) {
                    session.rollback();
                    lease.rollback();
                    return;
                }
                session.transfer(flowFile);
                itr.remove();
                continue;
            }
            final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(flowFile).getValue();
            final String messageKeyField = context.getProperty(MESSAGE_KEY_FIELD).evaluateAttributeExpressions(flowFile).getValue();
            try {
                session.read(flowFile, new InputStreamCallback() {

                    @Override
                    public void process(final InputStream rawIn) throws IOException {
                        try (final InputStream in = new BufferedInputStream(rawIn)) {
                            final RecordReader reader = readerFactory.createRecordReader(flowFile, in, getLogger());
                            final RecordSet recordSet = reader.createRecordSet();
                            final RecordSchema schema = writerFactory.getSchema(flowFile.getAttributes(), recordSet.getSchema());
                            lease.publish(flowFile, recordSet, writerFactory, schema, messageKeyField, topic);
                        } catch (final SchemaNotFoundException | MalformedRecordException e) {
                            throw new ProcessException(e);
                        }
                    }
                });
            } catch (final Exception e) {
                // The FlowFile will be obtained and the error logged below, when calling publishResult.getFailedFlowFiles()
                lease.fail(flowFile, e);
                continue;
            }
        }
        // Complete the send
        final PublishResult publishResult = lease.complete();
        if (publishResult.isFailure()) {
            getLogger().info("Failed to send FlowFile to kafka; transferring to failure");
            session.transfer(flowFiles, REL_FAILURE);
            return;
        }
        // Transfer any successful FlowFiles.
        final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime);
        for (FlowFile success : flowFiles) {
            final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(success).getValue();
            final int msgCount = publishResult.getSuccessfulMessageCount(success);
            success = session.putAttribute(success, MSG_COUNT, String.valueOf(msgCount));
            session.adjustCounter("Messages Sent", msgCount, true);
            final String transitUri = KafkaProcessorUtils.buildTransitURI(securityProtocol, bootstrapServers, topic);
            session.getProvenanceReporter().send(success, transitUri, "Sent " + msgCount + " messages", transmissionMillis);
            session.transfer(success, REL_SUCCESS);
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) IOException(java.io.IOException) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) ProcessException(org.apache.nifi.processor.exception.ProcessException) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) ProcessException(org.apache.nifi.processor.exception.ProcessException) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) BufferedInputStream(java.io.BufferedInputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) RecordSet(org.apache.nifi.serialization.record.RecordSet) RecordSchema(org.apache.nifi.serialization.record.RecordSchema)

Example 4 with RecordSetWriterFactory

use of org.apache.nifi.serialization.RecordSetWriterFactory in project nifi by apache.

the class TestConsumeKafkaRecord_1_0 method setup.

@Before
public void setup() throws InitializationException {
    mockLease = mock(ConsumerLease.class);
    mockConsumerPool = mock(ConsumerPool.class);
    ConsumeKafkaRecord_1_0 proc = new ConsumeKafkaRecord_1_0() {

        @Override
        protected ConsumerPool createConsumerPool(final ProcessContext context, final ComponentLog log) {
            return mockConsumerPool;
        }
    };
    runner = TestRunners.newTestRunner(proc);
    runner.setProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS, "okeydokey:1234");
    final String readerId = "record-reader";
    final MockRecordParser readerService = new MockRecordParser();
    readerService.addSchemaField("name", RecordFieldType.STRING);
    readerService.addSchemaField("age", RecordFieldType.INT);
    runner.addControllerService(readerId, readerService);
    runner.enableControllerService(readerService);
    final String writerId = "record-writer";
    final RecordSetWriterFactory writerService = new MockRecordWriter("name, age");
    runner.addControllerService(writerId, writerService);
    runner.enableControllerService(writerService);
    runner.setProperty(ConsumeKafkaRecord_1_0.RECORD_READER, readerId);
    runner.setProperty(ConsumeKafkaRecord_1_0.RECORD_WRITER, writerId);
}
Also used : RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) MockRecordWriter(org.apache.nifi.processors.kafka.pubsub.util.MockRecordWriter) ComponentLog(org.apache.nifi.logging.ComponentLog) ProcessContext(org.apache.nifi.processor.ProcessContext) MockRecordParser(org.apache.nifi.processors.kafka.pubsub.util.MockRecordParser) Before(org.junit.Before)

Example 5 with RecordSetWriterFactory

use of org.apache.nifi.serialization.RecordSetWriterFactory in project nifi by apache.

the class TestPublishKafkaRecord_1_0 method setup.

@Before
public void setup() throws InitializationException, IOException {
    mockPool = mock(PublisherPool.class);
    mockLease = mock(PublisherLease.class);
    Mockito.doCallRealMethod().when(mockLease).publish(any(FlowFile.class), any(RecordSet.class), any(RecordSetWriterFactory.class), any(RecordSchema.class), any(String.class), any(String.class));
    when(mockPool.obtainPublisher()).thenReturn(mockLease);
    runner = TestRunners.newTestRunner(new PublishKafkaRecord_1_0() {

        @Override
        protected PublisherPool createPublisherPool(final ProcessContext context) {
            return mockPool;
        }
    });
    runner.setProperty(PublishKafkaRecord_1_0.TOPIC, TOPIC_NAME);
    final String readerId = "record-reader";
    final MockRecordParser readerService = new MockRecordParser();
    readerService.addSchemaField("name", RecordFieldType.STRING);
    readerService.addSchemaField("age", RecordFieldType.INT);
    runner.addControllerService(readerId, readerService);
    runner.enableControllerService(readerService);
    final String writerId = "record-writer";
    final RecordSetWriterFactory writerService = new MockRecordWriter("name, age");
    runner.addControllerService(writerId, writerService);
    runner.enableControllerService(writerService);
    runner.setProperty(PublishKafkaRecord_1_0.RECORD_READER, readerId);
    runner.setProperty(PublishKafkaRecord_1_0.RECORD_WRITER, writerId);
    runner.setProperty(PublishKafka_1_0.DELIVERY_GUARANTEE, PublishKafka_1_0.DELIVERY_REPLICATED);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) MockFlowFile(org.apache.nifi.util.MockFlowFile) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) RecordSet(org.apache.nifi.serialization.record.RecordSet) MockRecordWriter(org.apache.nifi.processors.kafka.pubsub.util.MockRecordWriter) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) ProcessContext(org.apache.nifi.processor.ProcessContext) MockRecordParser(org.apache.nifi.processors.kafka.pubsub.util.MockRecordParser) Before(org.junit.Before)

Aggregations

RecordSetWriterFactory (org.apache.nifi.serialization.RecordSetWriterFactory)29 FlowFile (org.apache.nifi.flowfile.FlowFile)19 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)19 IOException (java.io.IOException)17 RecordReaderFactory (org.apache.nifi.serialization.RecordReaderFactory)15 RecordSetWriter (org.apache.nifi.serialization.RecordSetWriter)14 HashMap (java.util.HashMap)13 ProcessException (org.apache.nifi.processor.exception.ProcessException)13 RecordReader (org.apache.nifi.serialization.RecordReader)13 OutputStream (java.io.OutputStream)12 Record (org.apache.nifi.serialization.record.Record)12 InputStream (java.io.InputStream)11 SchemaNotFoundException (org.apache.nifi.schema.access.SchemaNotFoundException)10 RecordSet (org.apache.nifi.serialization.record.RecordSet)9 ArrayList (java.util.ArrayList)7 Map (java.util.Map)7 ComponentLog (org.apache.nifi.logging.ComponentLog)7 ProcessContext (org.apache.nifi.processor.ProcessContext)7 MockRecordParser (org.apache.nifi.processors.kafka.pubsub.util.MockRecordParser)7 WriteResult (org.apache.nifi.serialization.WriteResult)7