Search in sources :

Example 1 with RecordSetWriter

use of org.apache.nifi.serialization.RecordSetWriter in project nifi by apache.

the class TestConsumeAzureEventHub method setupRecordWriter.

private void setupRecordWriter(String throwErrorWith) throws SchemaNotFoundException, IOException {
    final RecordSetWriterFactory writerFactory = mock(RecordSetWriterFactory.class);
    processor.setWriterFactory(writerFactory);
    final RecordSetWriter writer = mock(RecordSetWriter.class);
    final AtomicReference<OutputStream> outRef = new AtomicReference<>();
    when(writerFactory.createWriter(any(), any(), any())).thenAnswer(invocation -> {
        outRef.set(invocation.getArgumentAt(2, OutputStream.class));
        return writer;
    });
    when(writer.write(any(Record.class))).thenAnswer(invocation -> {
        final String value = (String) invocation.getArgumentAt(0, Record.class).getValue("value");
        if (throwErrorWith != null && throwErrorWith.equals(value)) {
            throw new IOException("Simulating record write failure.");
        }
        outRef.get().write(value.getBytes(StandardCharsets.UTF_8));
        return WriteResult.of(1, Collections.emptyMap());
    });
}
Also used : RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) OutputStream(java.io.OutputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) IOException(java.io.IOException) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter)

Example 2 with RecordSetWriter

use of org.apache.nifi.serialization.RecordSetWriter in project nifi by apache.

the class PutDruidRecord method processFlowFile.

/**
 * Parses the record(s), converts each to a Map, and sends via Tranquility to the Druid Indexing Service
 *
 * @param context The process context
 * @param session The process session
 */
@SuppressWarnings("unchecked")
private void processFlowFile(ProcessContext context, final ProcessSession session) {
    final ComponentLog log = getLogger();
    // Get handle on Druid Tranquility session
    DruidTranquilityService tranquilityController = context.getProperty(DRUID_TRANQUILITY_SERVICE).asControllerService(DruidTranquilityService.class);
    Tranquilizer<Map<String, Object>> tranquilizer = tranquilityController.getTranquilizer();
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    // Create the outgoing flow files and output streams
    FlowFile droppedFlowFile = session.create(flowFile);
    final AtomicInteger droppedFlowFileCount = new AtomicInteger(0);
    FlowFile failedFlowFile = session.create(flowFile);
    final AtomicInteger failedFlowFileCount = new AtomicInteger(0);
    FlowFile successfulFlowFile = session.create(flowFile);
    final AtomicInteger successfulFlowFileCount = new AtomicInteger(0);
    final AtomicInteger recordWriteErrors = new AtomicInteger(0);
    int recordCount = 0;
    final OutputStream droppedOutputStream = session.write(droppedFlowFile);
    final RecordSetWriter droppedRecordWriter;
    final OutputStream failedOutputStream = session.write(failedFlowFile);
    final RecordSetWriter failedRecordWriter;
    final OutputStream successfulOutputStream = session.write(successfulFlowFile);
    final RecordSetWriter successfulRecordWriter;
    try (final InputStream in = session.read(flowFile)) {
        final RecordReaderFactory recordParserFactory = context.getProperty(RECORD_READER_FACTORY).asControllerService(RecordReaderFactory.class);
        final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER_FACTORY).asControllerService(RecordSetWriterFactory.class);
        final Map<String, String> attributes = flowFile.getAttributes();
        final RecordReader reader = recordParserFactory.createRecordReader(flowFile, in, getLogger());
        final RecordSchema outSchema = writerFactory.getSchema(attributes, reader.getSchema());
        droppedRecordWriter = writerFactory.createWriter(log, outSchema, droppedOutputStream);
        droppedRecordWriter.beginRecordSet();
        failedRecordWriter = writerFactory.createWriter(log, outSchema, failedOutputStream);
        failedRecordWriter.beginRecordSet();
        successfulRecordWriter = writerFactory.createWriter(log, outSchema, successfulOutputStream);
        successfulRecordWriter.beginRecordSet();
        Record r;
        while ((r = reader.nextRecord()) != null) {
            final Record record = r;
            recordCount++;
            // Convert each Record to HashMap and send to Druid
            Map<String, Object> contentMap = (Map<String, Object>) DataTypeUtils.convertRecordFieldtoObject(r, RecordFieldType.RECORD.getRecordDataType(r.getSchema()));
            log.debug("Tranquilizer Status: {}", new Object[] { tranquilizer.status().toString() });
            // Send data element to Druid asynchronously
            Future<BoxedUnit> future = tranquilizer.send(contentMap);
            log.debug("Sent Payload to Druid: {}", new Object[] { contentMap });
            // Wait for Druid to call back with status
            future.addEventListener(new FutureEventListener<Object>() {

                @Override
                public void onFailure(Throwable cause) {
                    if (cause instanceof MessageDroppedException) {
                        // This happens when event timestamp targets a Druid Indexing task that has closed (Late Arriving Data)
                        log.debug("Record Dropped due to MessageDroppedException: {}, transferring record to dropped.", new Object[] { cause.getMessage() }, cause);
                        try {
                            synchronized (droppedRecordWriter) {
                                droppedRecordWriter.write(record);
                                droppedRecordWriter.flush();
                                droppedFlowFileCount.incrementAndGet();
                            }
                        } catch (final IOException ioe) {
                            log.error("Error transferring record to dropped, this may result in data loss.", new Object[] { ioe.getMessage() }, ioe);
                            recordWriteErrors.incrementAndGet();
                        }
                    } else {
                        log.error("FlowFile Processing Failed due to: {}", new Object[] { cause.getMessage() }, cause);
                        try {
                            synchronized (failedRecordWriter) {
                                failedRecordWriter.write(record);
                                failedRecordWriter.flush();
                                failedFlowFileCount.incrementAndGet();
                            }
                        } catch (final IOException ioe) {
                            log.error("Error transferring record to failure, this may result in data loss.", new Object[] { ioe.getMessage() }, ioe);
                            recordWriteErrors.incrementAndGet();
                        }
                    }
                }

                @Override
                public void onSuccess(Object value) {
                    log.debug(" FlowFile Processing Success: {}", new Object[] { value.toString() });
                    try {
                        synchronized (successfulRecordWriter) {
                            successfulRecordWriter.write(record);
                            successfulRecordWriter.flush();
                            successfulFlowFileCount.incrementAndGet();
                        }
                    } catch (final IOException ioe) {
                        log.error("Error transferring record to success, this may result in data loss. " + "However the record was successfully processed by Druid", new Object[] { ioe.getMessage() }, ioe);
                        recordWriteErrors.incrementAndGet();
                    }
                }
            });
        }
    } catch (IOException | SchemaNotFoundException | MalformedRecordException e) {
        log.error("FlowFile Processing Failed due to: {}", new Object[] { e.getMessage() }, e);
        // The FlowFile will be obtained and the error logged below, when calling publishResult.getFailedFlowFiles()
        flowFile = session.putAttribute(flowFile, RECORD_COUNT, Integer.toString(recordCount));
        session.transfer(flowFile, REL_FAILURE);
        try {
            droppedOutputStream.close();
            session.remove(droppedFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with dropped records.", ioe);
        }
        try {
            failedOutputStream.close();
            session.remove(failedFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with failed records.", ioe);
        }
        try {
            successfulOutputStream.close();
            session.remove(successfulFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with successful records.", ioe);
        }
        session.commit();
        return;
    }
    if (recordCount == 0) {
        // Send original (empty) flow file to success, remove the rest
        flowFile = session.putAttribute(flowFile, RECORD_COUNT, "0");
        session.transfer(flowFile, REL_SUCCESS);
        try {
            droppedOutputStream.close();
            session.remove(droppedFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with dropped records.", ioe);
        }
        try {
            failedOutputStream.close();
            session.remove(failedFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with failed records.", ioe);
        }
        try {
            successfulOutputStream.close();
            session.remove(successfulFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with successful records.", ioe);
        }
    } else {
        // Wait for all the records to finish processing
        while (recordCount != (droppedFlowFileCount.get() + failedFlowFileCount.get() + successfulFlowFileCount.get() + recordWriteErrors.get())) {
            Thread.yield();
        }
        try {
            droppedRecordWriter.finishRecordSet();
            droppedRecordWriter.close();
        } catch (IOException ioe) {
            log.error("Error closing FlowFile with dropped records: {}", new Object[] { ioe.getMessage() }, ioe);
            session.rollback();
            throw new ProcessException(ioe);
        }
        if (droppedFlowFileCount.get() > 0) {
            droppedFlowFile = session.putAttribute(droppedFlowFile, RECORD_COUNT, Integer.toString(droppedFlowFileCount.get()));
            session.transfer(droppedFlowFile, REL_DROPPED);
        } else {
            session.remove(droppedFlowFile);
        }
        try {
            failedRecordWriter.finishRecordSet();
            failedRecordWriter.close();
        } catch (IOException ioe) {
            log.error("Error closing FlowFile with failed records: {}", new Object[] { ioe.getMessage() }, ioe);
            session.rollback();
            throw new ProcessException(ioe);
        }
        if (failedFlowFileCount.get() > 0) {
            failedFlowFile = session.putAttribute(failedFlowFile, RECORD_COUNT, Integer.toString(failedFlowFileCount.get()));
            session.transfer(failedFlowFile, REL_FAILURE);
        } else {
            session.remove(failedFlowFile);
        }
        try {
            successfulRecordWriter.finishRecordSet();
            successfulRecordWriter.close();
        } catch (IOException ioe) {
            log.error("Error closing FlowFile with successful records: {}", new Object[] { ioe.getMessage() }, ioe);
            session.rollback();
            throw new ProcessException(ioe);
        }
        if (successfulFlowFileCount.get() > 0) {
            successfulFlowFile = session.putAttribute(successfulFlowFile, RECORD_COUNT, Integer.toString(successfulFlowFileCount.get()));
            session.transfer(successfulFlowFile, REL_SUCCESS);
            session.getProvenanceReporter().send(successfulFlowFile, tranquilityController.getTransitUri());
        } else {
            session.remove(successfulFlowFile);
        }
        session.remove(flowFile);
    }
    session.commit();
}
Also used : MessageDroppedException(com.metamx.tranquility.tranquilizer.MessageDroppedException) OutputStream(java.io.OutputStream) RecordReader(org.apache.nifi.serialization.RecordReader) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) DruidTranquilityService(org.apache.nifi.controller.api.druid.DruidTranquilityService) Record(org.apache.nifi.serialization.record.Record) BoxedUnit(scala.runtime.BoxedUnit) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) ProcessException(org.apache.nifi.processor.exception.ProcessException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) Map(java.util.Map)

Example 3 with RecordSetWriter

use of org.apache.nifi.serialization.RecordSetWriter in project nifi by apache.

the class ConsumerLease method processBundle.

private boolean processBundle(final BundleTracker bundle) throws IOException {
    final RecordSetWriter writer = bundle.recordWriter;
    if (writer != null) {
        final WriteResult writeResult;
        try {
            writeResult = writer.finishRecordSet();
        } finally {
            writer.close();
        }
        if (writeResult.getRecordCount() == 0) {
            getProcessSession().remove(bundle.flowFile);
            return false;
        }
        final Map<String, String> attributes = new HashMap<>();
        attributes.putAll(writeResult.getAttributes());
        attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
        bundle.flowFile = getProcessSession().putAllAttributes(bundle.flowFile, attributes);
    }
    populateAttributes(bundle);
    return true;
}
Also used : WriteResult(org.apache.nifi.serialization.WriteResult) HashMap(java.util.HashMap) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter)

Example 4 with RecordSetWriter

use of org.apache.nifi.serialization.RecordSetWriter in project nifi by apache.

the class TestPublisherLease method testRecordsSentToRecordWriterAndThenToProducer.

@Test
public void testRecordsSentToRecordWriterAndThenToProducer() throws IOException, SchemaNotFoundException, MalformedRecordException {
    final PublisherLease lease = new PublisherLease(producer, 1024 * 1024, 10L, logger, true, null, StandardCharsets.UTF_8);
    final FlowFile flowFile = new MockFlowFile(1L);
    final byte[] exampleInput = "101, John Doe, 48\n102, Jane Doe, 47".getBytes(StandardCharsets.UTF_8);
    final MockRecordParser readerService = new MockRecordParser();
    readerService.addSchemaField("person_id", RecordFieldType.LONG);
    readerService.addSchemaField("name", RecordFieldType.STRING);
    readerService.addSchemaField("age", RecordFieldType.INT);
    final RecordReader reader = readerService.createRecordReader(Collections.emptyMap(), new ByteArrayInputStream(exampleInput), logger);
    final RecordSet recordSet = reader.createRecordSet();
    final RecordSchema schema = reader.getSchema();
    final String topic = "unit-test";
    final String keyField = "person_id";
    final RecordSetWriterFactory writerFactory = Mockito.mock(RecordSetWriterFactory.class);
    final RecordSetWriter writer = Mockito.mock(RecordSetWriter.class);
    Mockito.when(writer.write(Mockito.any(Record.class))).thenReturn(WriteResult.of(1, Collections.emptyMap()));
    Mockito.when(writerFactory.createWriter(eq(logger), eq(schema), any())).thenReturn(writer);
    lease.publish(flowFile, recordSet, writerFactory, schema, keyField, topic);
    verify(writerFactory, times(2)).createWriter(eq(logger), eq(schema), any());
    verify(writer, times(2)).write(any(Record.class));
    verify(producer, times(2)).send(any(), any());
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) MockFlowFile(org.apache.nifi.util.MockFlowFile) RecordReader(org.apache.nifi.serialization.RecordReader) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) MockFlowFile(org.apache.nifi.util.MockFlowFile) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) ByteArrayInputStream(java.io.ByteArrayInputStream) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) Record(org.apache.nifi.serialization.record.Record) RecordSet(org.apache.nifi.serialization.record.RecordSet) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) MockRecordParser(org.apache.nifi.processors.kafka.pubsub.util.MockRecordParser) Test(org.junit.Test)

Example 5 with RecordSetWriter

use of org.apache.nifi.serialization.RecordSetWriter in project nifi by apache.

the class MockRecordWriter method createWriter.

@Override
public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out) {
    return new RecordSetWriter() {

        @Override
        public void flush() throws IOException {
            out.flush();
        }

        @Override
        public WriteResult write(final RecordSet rs) throws IOException {
            out.write(header.getBytes());
            out.write("\n".getBytes());
            int recordCount = 0;
            final int numCols = rs.getSchema().getFieldCount();
            Record record = null;
            while ((record = rs.next()) != null) {
                if (++recordCount > failAfterN && failAfterN > -1) {
                    throw new IOException("Unit Test intentionally throwing IOException after " + failAfterN + " records were written");
                }
                int i = 0;
                for (final String fieldName : record.getSchema().getFieldNames()) {
                    final String val = record.getAsString(fieldName);
                    if (quoteValues) {
                        out.write("\"".getBytes());
                        if (val != null) {
                            out.write(val.getBytes());
                        }
                        out.write("\"".getBytes());
                    } else if (val != null) {
                        out.write(val.getBytes());
                    }
                    if (i++ < numCols - 1) {
                        out.write(",".getBytes());
                    }
                }
                out.write("\n".getBytes());
            }
            return WriteResult.of(recordCount, Collections.emptyMap());
        }

        @Override
        public String getMimeType() {
            return "text/plain";
        }

        @Override
        public WriteResult write(Record record) throws IOException {
            return WriteResult.of(1, Collections.emptyMap());
        }

        @Override
        public void close() throws IOException {
        }

        @Override
        public void beginRecordSet() throws IOException {
        }

        @Override
        public WriteResult finishRecordSet() throws IOException {
            return null;
        }
    };
}
Also used : Record(org.apache.nifi.serialization.record.Record) RecordSet(org.apache.nifi.serialization.record.RecordSet) IOException(java.io.IOException) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter)

Aggregations

RecordSetWriter (org.apache.nifi.serialization.RecordSetWriter)21 Record (org.apache.nifi.serialization.record.Record)17 IOException (java.io.IOException)16 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)16 OutputStream (java.io.OutputStream)14 FlowFile (org.apache.nifi.flowfile.FlowFile)14 RecordSetWriterFactory (org.apache.nifi.serialization.RecordSetWriterFactory)14 HashMap (java.util.HashMap)13 InputStream (java.io.InputStream)11 ProcessException (org.apache.nifi.processor.exception.ProcessException)11 RecordReader (org.apache.nifi.serialization.RecordReader)11 WriteResult (org.apache.nifi.serialization.WriteResult)10 RecordReaderFactory (org.apache.nifi.serialization.RecordReaderFactory)8 Map (java.util.Map)7 SchemaNotFoundException (org.apache.nifi.schema.access.SchemaNotFoundException)7 ArrayList (java.util.ArrayList)6 ByteArrayInputStream (java.io.ByteArrayInputStream)5 HashSet (java.util.HashSet)4 MalformedRecordException (org.apache.nifi.serialization.MalformedRecordException)4 RecordSet (org.apache.nifi.serialization.record.RecordSet)4