Search in sources :

Example 1 with Record

use of org.apache.nifi.serialization.record.Record in project nifi by apache.

the class TestConsumeAzureEventHub method setupRecordWriter.

private void setupRecordWriter(String throwErrorWith) throws SchemaNotFoundException, IOException {
    final RecordSetWriterFactory writerFactory = mock(RecordSetWriterFactory.class);
    processor.setWriterFactory(writerFactory);
    final RecordSetWriter writer = mock(RecordSetWriter.class);
    final AtomicReference<OutputStream> outRef = new AtomicReference<>();
    when(writerFactory.createWriter(any(), any(), any())).thenAnswer(invocation -> {
        outRef.set(invocation.getArgumentAt(2, OutputStream.class));
        return writer;
    });
    when(writer.write(any(Record.class))).thenAnswer(invocation -> {
        final String value = (String) invocation.getArgumentAt(0, Record.class).getValue("value");
        if (throwErrorWith != null && throwErrorWith.equals(value)) {
            throw new IOException("Simulating record write failure.");
        }
        outRef.get().write(value.getBytes(StandardCharsets.UTF_8));
        return WriteResult.of(1, Collections.emptyMap());
    });
}
Also used : RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) OutputStream(java.io.OutputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) IOException(java.io.IOException) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter)

Example 2 with Record

use of org.apache.nifi.serialization.record.Record in project nifi by apache.

the class PutDruidRecord method processFlowFile.

/**
 * Parses the record(s), converts each to a Map, and sends via Tranquility to the Druid Indexing Service
 *
 * @param context The process context
 * @param session The process session
 */
@SuppressWarnings("unchecked")
private void processFlowFile(ProcessContext context, final ProcessSession session) {
    final ComponentLog log = getLogger();
    // Get handle on Druid Tranquility session
    DruidTranquilityService tranquilityController = context.getProperty(DRUID_TRANQUILITY_SERVICE).asControllerService(DruidTranquilityService.class);
    Tranquilizer<Map<String, Object>> tranquilizer = tranquilityController.getTranquilizer();
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    // Create the outgoing flow files and output streams
    FlowFile droppedFlowFile = session.create(flowFile);
    final AtomicInteger droppedFlowFileCount = new AtomicInteger(0);
    FlowFile failedFlowFile = session.create(flowFile);
    final AtomicInteger failedFlowFileCount = new AtomicInteger(0);
    FlowFile successfulFlowFile = session.create(flowFile);
    final AtomicInteger successfulFlowFileCount = new AtomicInteger(0);
    final AtomicInteger recordWriteErrors = new AtomicInteger(0);
    int recordCount = 0;
    final OutputStream droppedOutputStream = session.write(droppedFlowFile);
    final RecordSetWriter droppedRecordWriter;
    final OutputStream failedOutputStream = session.write(failedFlowFile);
    final RecordSetWriter failedRecordWriter;
    final OutputStream successfulOutputStream = session.write(successfulFlowFile);
    final RecordSetWriter successfulRecordWriter;
    try (final InputStream in = session.read(flowFile)) {
        final RecordReaderFactory recordParserFactory = context.getProperty(RECORD_READER_FACTORY).asControllerService(RecordReaderFactory.class);
        final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER_FACTORY).asControllerService(RecordSetWriterFactory.class);
        final Map<String, String> attributes = flowFile.getAttributes();
        final RecordReader reader = recordParserFactory.createRecordReader(flowFile, in, getLogger());
        final RecordSchema outSchema = writerFactory.getSchema(attributes, reader.getSchema());
        droppedRecordWriter = writerFactory.createWriter(log, outSchema, droppedOutputStream);
        droppedRecordWriter.beginRecordSet();
        failedRecordWriter = writerFactory.createWriter(log, outSchema, failedOutputStream);
        failedRecordWriter.beginRecordSet();
        successfulRecordWriter = writerFactory.createWriter(log, outSchema, successfulOutputStream);
        successfulRecordWriter.beginRecordSet();
        Record r;
        while ((r = reader.nextRecord()) != null) {
            final Record record = r;
            recordCount++;
            // Convert each Record to HashMap and send to Druid
            Map<String, Object> contentMap = (Map<String, Object>) DataTypeUtils.convertRecordFieldtoObject(r, RecordFieldType.RECORD.getRecordDataType(r.getSchema()));
            log.debug("Tranquilizer Status: {}", new Object[] { tranquilizer.status().toString() });
            // Send data element to Druid asynchronously
            Future<BoxedUnit> future = tranquilizer.send(contentMap);
            log.debug("Sent Payload to Druid: {}", new Object[] { contentMap });
            // Wait for Druid to call back with status
            future.addEventListener(new FutureEventListener<Object>() {

                @Override
                public void onFailure(Throwable cause) {
                    if (cause instanceof MessageDroppedException) {
                        // This happens when event timestamp targets a Druid Indexing task that has closed (Late Arriving Data)
                        log.debug("Record Dropped due to MessageDroppedException: {}, transferring record to dropped.", new Object[] { cause.getMessage() }, cause);
                        try {
                            synchronized (droppedRecordWriter) {
                                droppedRecordWriter.write(record);
                                droppedRecordWriter.flush();
                                droppedFlowFileCount.incrementAndGet();
                            }
                        } catch (final IOException ioe) {
                            log.error("Error transferring record to dropped, this may result in data loss.", new Object[] { ioe.getMessage() }, ioe);
                            recordWriteErrors.incrementAndGet();
                        }
                    } else {
                        log.error("FlowFile Processing Failed due to: {}", new Object[] { cause.getMessage() }, cause);
                        try {
                            synchronized (failedRecordWriter) {
                                failedRecordWriter.write(record);
                                failedRecordWriter.flush();
                                failedFlowFileCount.incrementAndGet();
                            }
                        } catch (final IOException ioe) {
                            log.error("Error transferring record to failure, this may result in data loss.", new Object[] { ioe.getMessage() }, ioe);
                            recordWriteErrors.incrementAndGet();
                        }
                    }
                }

                @Override
                public void onSuccess(Object value) {
                    log.debug(" FlowFile Processing Success: {}", new Object[] { value.toString() });
                    try {
                        synchronized (successfulRecordWriter) {
                            successfulRecordWriter.write(record);
                            successfulRecordWriter.flush();
                            successfulFlowFileCount.incrementAndGet();
                        }
                    } catch (final IOException ioe) {
                        log.error("Error transferring record to success, this may result in data loss. " + "However the record was successfully processed by Druid", new Object[] { ioe.getMessage() }, ioe);
                        recordWriteErrors.incrementAndGet();
                    }
                }
            });
        }
    } catch (IOException | SchemaNotFoundException | MalformedRecordException e) {
        log.error("FlowFile Processing Failed due to: {}", new Object[] { e.getMessage() }, e);
        // The FlowFile will be obtained and the error logged below, when calling publishResult.getFailedFlowFiles()
        flowFile = session.putAttribute(flowFile, RECORD_COUNT, Integer.toString(recordCount));
        session.transfer(flowFile, REL_FAILURE);
        try {
            droppedOutputStream.close();
            session.remove(droppedFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with dropped records.", ioe);
        }
        try {
            failedOutputStream.close();
            session.remove(failedFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with failed records.", ioe);
        }
        try {
            successfulOutputStream.close();
            session.remove(successfulFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with successful records.", ioe);
        }
        session.commit();
        return;
    }
    if (recordCount == 0) {
        // Send original (empty) flow file to success, remove the rest
        flowFile = session.putAttribute(flowFile, RECORD_COUNT, "0");
        session.transfer(flowFile, REL_SUCCESS);
        try {
            droppedOutputStream.close();
            session.remove(droppedFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with dropped records.", ioe);
        }
        try {
            failedOutputStream.close();
            session.remove(failedFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with failed records.", ioe);
        }
        try {
            successfulOutputStream.close();
            session.remove(successfulFlowFile);
        } catch (IOException ioe) {
            log.error("Error closing output stream for FlowFile with successful records.", ioe);
        }
    } else {
        // Wait for all the records to finish processing
        while (recordCount != (droppedFlowFileCount.get() + failedFlowFileCount.get() + successfulFlowFileCount.get() + recordWriteErrors.get())) {
            Thread.yield();
        }
        try {
            droppedRecordWriter.finishRecordSet();
            droppedRecordWriter.close();
        } catch (IOException ioe) {
            log.error("Error closing FlowFile with dropped records: {}", new Object[] { ioe.getMessage() }, ioe);
            session.rollback();
            throw new ProcessException(ioe);
        }
        if (droppedFlowFileCount.get() > 0) {
            droppedFlowFile = session.putAttribute(droppedFlowFile, RECORD_COUNT, Integer.toString(droppedFlowFileCount.get()));
            session.transfer(droppedFlowFile, REL_DROPPED);
        } else {
            session.remove(droppedFlowFile);
        }
        try {
            failedRecordWriter.finishRecordSet();
            failedRecordWriter.close();
        } catch (IOException ioe) {
            log.error("Error closing FlowFile with failed records: {}", new Object[] { ioe.getMessage() }, ioe);
            session.rollback();
            throw new ProcessException(ioe);
        }
        if (failedFlowFileCount.get() > 0) {
            failedFlowFile = session.putAttribute(failedFlowFile, RECORD_COUNT, Integer.toString(failedFlowFileCount.get()));
            session.transfer(failedFlowFile, REL_FAILURE);
        } else {
            session.remove(failedFlowFile);
        }
        try {
            successfulRecordWriter.finishRecordSet();
            successfulRecordWriter.close();
        } catch (IOException ioe) {
            log.error("Error closing FlowFile with successful records: {}", new Object[] { ioe.getMessage() }, ioe);
            session.rollback();
            throw new ProcessException(ioe);
        }
        if (successfulFlowFileCount.get() > 0) {
            successfulFlowFile = session.putAttribute(successfulFlowFile, RECORD_COUNT, Integer.toString(successfulFlowFileCount.get()));
            session.transfer(successfulFlowFile, REL_SUCCESS);
            session.getProvenanceReporter().send(successfulFlowFile, tranquilityController.getTransitUri());
        } else {
            session.remove(successfulFlowFile);
        }
        session.remove(flowFile);
    }
    session.commit();
}
Also used : MessageDroppedException(com.metamx.tranquility.tranquilizer.MessageDroppedException) OutputStream(java.io.OutputStream) RecordReader(org.apache.nifi.serialization.RecordReader) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) DruidTranquilityService(org.apache.nifi.controller.api.druid.DruidTranquilityService) Record(org.apache.nifi.serialization.record.Record) BoxedUnit(scala.runtime.BoxedUnit) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) ProcessException(org.apache.nifi.processor.exception.ProcessException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) Map(java.util.Map)

Example 3 with Record

use of org.apache.nifi.serialization.record.Record in project nifi by apache.

the class PutElasticsearchHttpRecord method writeValue.

@SuppressWarnings("unchecked")
private void writeValue(final JsonGenerator generator, final Object value, final String fieldName, final DataType dataType) throws IOException {
    if (value == null) {
        if (nullSuppression.equals(NEVER_SUPPRESS.getValue()) || ((nullSuppression.equals(SUPPRESS_MISSING.getValue())) && fieldName != null && !fieldName.equals(""))) {
            generator.writeNullField(fieldName);
        }
        return;
    }
    final DataType chosenDataType = dataType.getFieldType() == RecordFieldType.CHOICE ? DataTypeUtils.chooseDataType(value, (ChoiceDataType) dataType) : dataType;
    final Object coercedValue = DataTypeUtils.convertType(value, chosenDataType, fieldName);
    if (coercedValue == null) {
        generator.writeNull();
        return;
    }
    switch(chosenDataType.getFieldType()) {
        case DATE:
            {
                final String stringValue = DataTypeUtils.toString(coercedValue, () -> DataTypeUtils.getDateFormat(RecordFieldType.DATE.getDefaultFormat()));
                if (DataTypeUtils.isLongTypeCompatible(stringValue)) {
                    generator.writeNumber(DataTypeUtils.toLong(coercedValue, fieldName));
                } else {
                    generator.writeString(stringValue);
                }
                break;
            }
        case TIME:
            {
                final String stringValue = DataTypeUtils.toString(coercedValue, () -> DataTypeUtils.getDateFormat(RecordFieldType.TIME.getDefaultFormat()));
                if (DataTypeUtils.isLongTypeCompatible(stringValue)) {
                    generator.writeNumber(DataTypeUtils.toLong(coercedValue, fieldName));
                } else {
                    generator.writeString(stringValue);
                }
                break;
            }
        case TIMESTAMP:
            {
                final String stringValue = DataTypeUtils.toString(coercedValue, () -> DataTypeUtils.getDateFormat(RecordFieldType.TIMESTAMP.getDefaultFormat()));
                if (DataTypeUtils.isLongTypeCompatible(stringValue)) {
                    generator.writeNumber(DataTypeUtils.toLong(coercedValue, fieldName));
                } else {
                    generator.writeString(stringValue);
                }
                break;
            }
        case DOUBLE:
            generator.writeNumber(DataTypeUtils.toDouble(coercedValue, fieldName));
            break;
        case FLOAT:
            generator.writeNumber(DataTypeUtils.toFloat(coercedValue, fieldName));
            break;
        case LONG:
            generator.writeNumber(DataTypeUtils.toLong(coercedValue, fieldName));
            break;
        case INT:
        case BYTE:
        case SHORT:
            generator.writeNumber(DataTypeUtils.toInteger(coercedValue, fieldName));
            break;
        case CHAR:
        case STRING:
            generator.writeString(coercedValue.toString());
            break;
        case BIGINT:
            if (coercedValue instanceof Long) {
                generator.writeNumber((Long) coercedValue);
            } else {
                generator.writeNumber((BigInteger) coercedValue);
            }
            break;
        case BOOLEAN:
            final String stringValue = coercedValue.toString();
            if ("true".equalsIgnoreCase(stringValue)) {
                generator.writeBoolean(true);
            } else if ("false".equalsIgnoreCase(stringValue)) {
                generator.writeBoolean(false);
            } else {
                generator.writeString(stringValue);
            }
            break;
        case RECORD:
            {
                final Record record = (Record) coercedValue;
                final RecordDataType recordDataType = (RecordDataType) chosenDataType;
                final RecordSchema childSchema = recordDataType.getChildSchema();
                writeRecord(record, childSchema, generator);
                break;
            }
        case MAP:
            {
                final MapDataType mapDataType = (MapDataType) chosenDataType;
                final DataType valueDataType = mapDataType.getValueType();
                final Map<String, ?> map = (Map<String, ?>) coercedValue;
                generator.writeStartObject();
                for (final Map.Entry<String, ?> entry : map.entrySet()) {
                    final String mapKey = entry.getKey();
                    final Object mapValue = entry.getValue();
                    generator.writeFieldName(mapKey);
                    writeValue(generator, mapValue, fieldName + "." + mapKey, valueDataType);
                }
                generator.writeEndObject();
                break;
            }
        case ARRAY:
        default:
            if (coercedValue instanceof Object[]) {
                final Object[] values = (Object[]) coercedValue;
                final ArrayDataType arrayDataType = (ArrayDataType) dataType;
                final DataType elementType = arrayDataType.getElementType();
                writeArray(values, fieldName, generator, elementType);
            } else {
                generator.writeString(coercedValue.toString());
            }
            break;
    }
}
Also used : MapDataType(org.apache.nifi.serialization.record.type.MapDataType) RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) DataType(org.apache.nifi.serialization.record.DataType) ChoiceDataType(org.apache.nifi.serialization.record.type.ChoiceDataType) MapDataType(org.apache.nifi.serialization.record.type.MapDataType) ArrayDataType(org.apache.nifi.serialization.record.type.ArrayDataType) RecordDataType(org.apache.nifi.serialization.record.type.RecordDataType) Record(org.apache.nifi.serialization.record.Record) ChoiceDataType(org.apache.nifi.serialization.record.type.ChoiceDataType) ArrayDataType(org.apache.nifi.serialization.record.type.ArrayDataType) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) Map(java.util.Map)

Example 4 with Record

use of org.apache.nifi.serialization.record.Record in project nifi by apache.

the class ConsumerLease method writeData.

private void writeData(final ProcessSession session, ConsumerRecord<byte[], byte[]> record, final TopicPartition topicPartition) {
    FlowFile flowFile = session.create();
    final BundleTracker tracker = new BundleTracker(record, topicPartition, keyEncoding);
    tracker.incrementRecordCount(1);
    final byte[] value = record.value();
    if (value != null) {
        flowFile = session.write(flowFile, out -> {
            out.write(value);
        });
    }
    flowFile = session.putAllAttributes(flowFile, getAttributes(record));
    tracker.updateFlowFile(flowFile);
    populateAttributes(tracker);
    session.transfer(tracker.flowFile, REL_SUCCESS);
}
Also used : KafkaException(org.apache.kafka.common.KafkaException) HashMap(java.util.HashMap) ConsumerRecords(org.apache.kafka.clients.consumer.ConsumerRecords) ComponentLog(org.apache.nifi.logging.ComponentLog) ProcessException(org.apache.nifi.processor.exception.ProcessException) ArrayList(java.util.ArrayList) UTF8_ENCODING(org.apache.nifi.processors.kafka.pubsub.KafkaProcessorUtils.UTF8_ENCODING) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) ByteArrayInputStream(java.io.ByteArrayInputStream) Charset(java.nio.charset.Charset) RecordReader(org.apache.nifi.serialization.RecordReader) Map(java.util.Map) Record(org.apache.nifi.serialization.record.Record) OutputStream(java.io.OutputStream) Consumer(org.apache.kafka.clients.consumer.Consumer) TopicPartition(org.apache.kafka.common.TopicPartition) HEX_ENCODING(org.apache.nifi.processors.kafka.pubsub.KafkaProcessorUtils.HEX_ENCODING) FlowFile(org.apache.nifi.flowfile.FlowFile) Collection(java.util.Collection) WriteResult(org.apache.nifi.serialization.WriteResult) REL_PARSE_FAILURE(org.apache.nifi.processors.kafka.pubsub.ConsumeKafkaRecord_1_0.REL_PARSE_FAILURE) IOException(java.io.IOException) ProcessSession(org.apache.nifi.processor.ProcessSession) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) Objects(java.util.Objects) TimeUnit(java.util.concurrent.TimeUnit) ConsumerRebalanceListener(org.apache.kafka.clients.consumer.ConsumerRebalanceListener) List(java.util.List) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) REL_SUCCESS(org.apache.nifi.processors.kafka.pubsub.ConsumeKafkaRecord_1_0.REL_SUCCESS) Header(org.apache.kafka.common.header.Header) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) Closeable(java.io.Closeable) OffsetAndMetadata(org.apache.kafka.clients.consumer.OffsetAndMetadata) Pattern(java.util.regex.Pattern) CoreAttributes(org.apache.nifi.flowfile.attributes.CoreAttributes) DatatypeConverter(javax.xml.bind.DatatypeConverter) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) InputStream(java.io.InputStream) KafkaConsumer(org.apache.kafka.clients.consumer.KafkaConsumer) FlowFile(org.apache.nifi.flowfile.FlowFile)

Example 5 with Record

use of org.apache.nifi.serialization.record.Record in project nifi by apache.

the class MockRecordWriter method createWriter.

@Override
public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out) {
    return new RecordSetWriter() {

        @Override
        public void flush() throws IOException {
            out.flush();
        }

        @Override
        public WriteResult write(final RecordSet rs) throws IOException {
            out.write(header.getBytes());
            out.write("\n".getBytes());
            int recordCount = 0;
            final int numCols = rs.getSchema().getFieldCount();
            Record record = null;
            while ((record = rs.next()) != null) {
                if (++recordCount > failAfterN && failAfterN > -1) {
                    throw new IOException("Unit Test intentionally throwing IOException after " + failAfterN + " records were written");
                }
                int i = 0;
                for (final String fieldName : record.getSchema().getFieldNames()) {
                    final String val = record.getAsString(fieldName);
                    if (quoteValues) {
                        out.write("\"".getBytes());
                        if (val != null) {
                            out.write(val.getBytes());
                        }
                        out.write("\"".getBytes());
                    } else if (val != null) {
                        out.write(val.getBytes());
                    }
                    if (i++ < numCols - 1) {
                        out.write(",".getBytes());
                    }
                }
                out.write("\n".getBytes());
            }
            return WriteResult.of(recordCount, Collections.emptyMap());
        }

        @Override
        public String getMimeType() {
            return "text/plain";
        }

        @Override
        public WriteResult write(Record record) throws IOException {
            return WriteResult.of(1, Collections.emptyMap());
        }

        @Override
        public void close() throws IOException {
        }

        @Override
        public void beginRecordSet() throws IOException {
        }

        @Override
        public WriteResult finishRecordSet() throws IOException {
            return null;
        }
    };
}
Also used : Record(org.apache.nifi.serialization.record.Record) RecordSet(org.apache.nifi.serialization.record.RecordSet) IOException(java.io.IOException) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter)

Aggregations

Record (org.apache.nifi.serialization.record.Record)168 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)137 Test (org.junit.Test)116 SimpleRecordSchema (org.apache.nifi.serialization.SimpleRecordSchema)115 MapRecord (org.apache.nifi.serialization.record.MapRecord)98 RecordField (org.apache.nifi.serialization.record.RecordField)95 ArrayList (java.util.ArrayList)87 HashMap (java.util.HashMap)77 InputStream (java.io.InputStream)53 ByteArrayInputStream (java.io.ByteArrayInputStream)42 ComponentLog (org.apache.nifi.logging.ComponentLog)42 FileInputStream (java.io.FileInputStream)31 LinkedHashMap (java.util.LinkedHashMap)29 IOException (java.io.IOException)28 ByteArrayOutputStream (java.io.ByteArrayOutputStream)24 Map (java.util.Map)24 RecordSetWriter (org.apache.nifi.serialization.RecordSetWriter)21 FlowFile (org.apache.nifi.flowfile.FlowFile)20 RecordReader (org.apache.nifi.serialization.RecordReader)18 OutputStream (java.io.OutputStream)17