Search in sources :

Example 16 with RecordSetWriter

use of org.apache.nifi.serialization.RecordSetWriter in project nifi by apache.

the class PublisherLease method publish.

void publish(final FlowFile flowFile, final RecordSet recordSet, final RecordSetWriterFactory writerFactory, final RecordSchema schema, final String messageKeyField, final String topic) throws IOException {
    if (tracker == null) {
        tracker = new InFlightMessageTracker(logger);
    }
    final ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
    Record record;
    int recordCount = 0;
    try {
        while ((record = recordSet.next()) != null) {
            recordCount++;
            baos.reset();
            Map<String, String> additionalAttributes = Collections.emptyMap();
            try (final RecordSetWriter writer = writerFactory.createWriter(logger, schema, baos)) {
                final WriteResult writeResult = writer.write(record);
                additionalAttributes = writeResult.getAttributes();
                writer.flush();
            }
            final byte[] messageContent = baos.toByteArray();
            final String key = messageKeyField == null ? null : record.getAsString(messageKeyField);
            final byte[] messageKey = (key == null) ? null : key.getBytes(StandardCharsets.UTF_8);
            publish(flowFile, additionalAttributes, messageKey, messageContent, topic, tracker);
            if (tracker.isFailed(flowFile)) {
                // If we have a failure, don't try to send anything else.
                return;
            }
        }
        if (recordCount == 0) {
            tracker.trackEmpty(flowFile);
        }
    } catch (final TokenTooLargeException ttle) {
        tracker.fail(flowFile, ttle);
    } catch (final SchemaNotFoundException snfe) {
        throw new IOException(snfe);
    } catch (final Exception e) {
        tracker.fail(flowFile, e);
        poison();
        throw e;
    }
}
Also used : ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) TimeoutException(java.util.concurrent.TimeoutException) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) TokenTooLargeException(org.apache.nifi.stream.io.exception.TokenTooLargeException) IOException(java.io.IOException) WriteResult(org.apache.nifi.serialization.WriteResult) TokenTooLargeException(org.apache.nifi.stream.io.exception.TokenTooLargeException) ProducerRecord(org.apache.kafka.clients.producer.ProducerRecord) Record(org.apache.nifi.serialization.record.Record) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException)

Example 17 with RecordSetWriter

use of org.apache.nifi.serialization.RecordSetWriter in project nifi by apache.

the class ConsumerLease method writeRecordData.

private void writeRecordData(final ProcessSession session, final List<ConsumerRecord<byte[], byte[]>> records, final TopicPartition topicPartition) {
    // In order to obtain a RecordReader from the RecordReaderFactory, we need to give it a FlowFile.
    // We don't want to create a new FlowFile for each record that we receive, so we will just create
    // a "temporary flowfile" that will be removed in the finally block below and use that to pass to
    // the createRecordReader method.
    RecordSetWriter writer = null;
    try {
        for (final ConsumerRecord<byte[], byte[]> consumerRecord : records) {
            final Map<String, String> attributes = getAttributes(consumerRecord);
            final byte[] recordBytes = consumerRecord.value() == null ? new byte[0] : consumerRecord.value();
            try (final InputStream in = new ByteArrayInputStream(recordBytes)) {
                final RecordReader reader;
                try {
                    reader = readerFactory.createRecordReader(attributes, in, logger);
                } catch (final IOException e) {
                    yield();
                    rollback(topicPartition);
                    handleParseFailure(consumerRecord, session, e, "Failed to parse message from Kafka due to comms failure. Will roll back session and try again momentarily.");
                    closeWriter(writer);
                    return;
                } catch (final Exception e) {
                    handleParseFailure(consumerRecord, session, e);
                    continue;
                }
                Record record;
                while ((record = reader.nextRecord()) != null) {
                    // Determine the bundle for this record.
                    final RecordSchema recordSchema = record.getSchema();
                    final BundleInformation bundleInfo = new BundleInformation(topicPartition, recordSchema, attributes);
                    BundleTracker tracker = bundleMap.get(bundleInfo);
                    if (tracker == null) {
                        FlowFile flowFile = session.create();
                        flowFile = session.putAllAttributes(flowFile, attributes);
                        final OutputStream rawOut = session.write(flowFile);
                        final RecordSchema writeSchema;
                        try {
                            writeSchema = writerFactory.getSchema(flowFile.getAttributes(), recordSchema);
                        } catch (final Exception e) {
                            logger.error("Failed to obtain Schema for FlowFile. Will roll back the Kafka message offsets.", e);
                            rollback(topicPartition);
                            yield();
                            throw new ProcessException(e);
                        }
                        writer = writerFactory.createWriter(logger, writeSchema, rawOut);
                        writer.beginRecordSet();
                        tracker = new BundleTracker(consumerRecord, topicPartition, keyEncoding, writer);
                        tracker.updateFlowFile(flowFile);
                        bundleMap.put(bundleInfo, tracker);
                    } else {
                        writer = tracker.recordWriter;
                    }
                    try {
                        writer.write(record);
                    } catch (final RuntimeException re) {
                        handleParseFailure(consumerRecord, session, re, "Failed to write message from Kafka using the configured Record Writer. " + "Will route message as its own FlowFile to the 'parse.failure' relationship");
                        continue;
                    }
                    tracker.incrementRecordCount(1L);
                    session.adjustCounter("Records Received", 1L, false);
                }
            }
        }
    } catch (final Exception e) {
        logger.error("Failed to properly receive messages from Kafka. Will roll back session and any un-committed offsets from Kafka.", e);
        closeWriter(writer);
        rollback(topicPartition);
        throw new ProcessException(e);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) OutputStream(java.io.OutputStream) IOException(java.io.IOException) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) KafkaException(org.apache.kafka.common.KafkaException) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) ProcessException(org.apache.nifi.processor.exception.ProcessException) ByteArrayInputStream(java.io.ByteArrayInputStream) Record(org.apache.nifi.serialization.record.Record) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) RecordSchema(org.apache.nifi.serialization.record.RecordSchema)

Example 18 with RecordSetWriter

use of org.apache.nifi.serialization.RecordSetWriter in project nifi by apache.

the class QueryRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    final FlowFile original = session.get();
    if (original == null) {
        return;
    }
    final StopWatch stopWatch = new StopWatch(true);
    final RecordSetWriterFactory recordSetWriterFactory = context.getProperty(RECORD_WRITER_FACTORY).asControllerService(RecordSetWriterFactory.class);
    final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER_FACTORY).asControllerService(RecordReaderFactory.class);
    final Map<FlowFile, Relationship> transformedFlowFiles = new HashMap<>();
    final Set<FlowFile> createdFlowFiles = new HashSet<>();
    // Determine the Record Reader's schema
    final RecordSchema readerSchema;
    try (final InputStream rawIn = session.read(original)) {
        final Map<String, String> originalAttributes = original.getAttributes();
        final RecordReader reader = recordReaderFactory.createRecordReader(originalAttributes, rawIn, getLogger());
        final RecordSchema inputSchema = reader.getSchema();
        readerSchema = recordSetWriterFactory.getSchema(originalAttributes, inputSchema);
    } catch (final Exception e) {
        getLogger().error("Failed to determine Record Schema from {}; routing to failure", new Object[] { original, e });
        session.transfer(original, REL_FAILURE);
        return;
    }
    // Determine the schema for writing the data
    final Map<String, String> originalAttributes = original.getAttributes();
    int recordsRead = 0;
    try {
        for (final PropertyDescriptor descriptor : context.getProperties().keySet()) {
            if (!descriptor.isDynamic()) {
                continue;
            }
            final Relationship relationship = new Relationship.Builder().name(descriptor.getName()).build();
            // We have to fork a child because we may need to read the input FlowFile more than once,
            // and we cannot call session.read() on the original FlowFile while we are within a write
            // callback for the original FlowFile.
            FlowFile transformed = session.create(original);
            boolean flowFileRemoved = false;
            try {
                final String sql = context.getProperty(descriptor).evaluateAttributeExpressions(original).getValue();
                final AtomicReference<WriteResult> writeResultRef = new AtomicReference<>();
                final QueryResult queryResult;
                if (context.getProperty(CACHE_SCHEMA).asBoolean()) {
                    queryResult = queryWithCache(session, original, sql, context, recordReaderFactory);
                } else {
                    queryResult = query(session, original, sql, context, recordReaderFactory);
                }
                final AtomicReference<String> mimeTypeRef = new AtomicReference<>();
                try {
                    final ResultSet rs = queryResult.getResultSet();
                    transformed = session.write(transformed, new OutputStreamCallback() {

                        @Override
                        public void process(final OutputStream out) throws IOException {
                            final ResultSetRecordSet recordSet;
                            final RecordSchema writeSchema;
                            try {
                                recordSet = new ResultSetRecordSet(rs, readerSchema);
                                final RecordSchema resultSetSchema = recordSet.getSchema();
                                writeSchema = recordSetWriterFactory.getSchema(originalAttributes, resultSetSchema);
                            } catch (final SQLException | SchemaNotFoundException e) {
                                throw new ProcessException(e);
                            }
                            try (final RecordSetWriter resultSetWriter = recordSetWriterFactory.createWriter(getLogger(), writeSchema, out)) {
                                writeResultRef.set(resultSetWriter.write(recordSet));
                                mimeTypeRef.set(resultSetWriter.getMimeType());
                            } catch (final Exception e) {
                                throw new IOException(e);
                            }
                        }
                    });
                } finally {
                    closeQuietly(queryResult);
                }
                recordsRead = Math.max(recordsRead, queryResult.getRecordsRead());
                final WriteResult result = writeResultRef.get();
                if (result.getRecordCount() == 0 && !context.getProperty(INCLUDE_ZERO_RECORD_FLOWFILES).asBoolean()) {
                    session.remove(transformed);
                    flowFileRemoved = true;
                    transformedFlowFiles.remove(transformed);
                    getLogger().info("Transformed {} but the result contained no data so will not pass on a FlowFile", new Object[] { original });
                } else {
                    final Map<String, String> attributesToAdd = new HashMap<>();
                    if (result.getAttributes() != null) {
                        attributesToAdd.putAll(result.getAttributes());
                    }
                    attributesToAdd.put(CoreAttributes.MIME_TYPE.key(), mimeTypeRef.get());
                    attributesToAdd.put("record.count", String.valueOf(result.getRecordCount()));
                    transformed = session.putAllAttributes(transformed, attributesToAdd);
                    transformedFlowFiles.put(transformed, relationship);
                    session.adjustCounter("Records Written", result.getRecordCount(), false);
                }
            } finally {
                // Ensure that we have the FlowFile in the set in case we throw any Exception
                if (!flowFileRemoved) {
                    createdFlowFiles.add(transformed);
                }
            }
        }
        final long elapsedMillis = stopWatch.getElapsed(TimeUnit.MILLISECONDS);
        if (transformedFlowFiles.size() > 0) {
            session.getProvenanceReporter().fork(original, transformedFlowFiles.keySet(), elapsedMillis);
            for (final Map.Entry<FlowFile, Relationship> entry : transformedFlowFiles.entrySet()) {
                final FlowFile transformed = entry.getKey();
                final Relationship relationship = entry.getValue();
                session.getProvenanceReporter().route(transformed, relationship);
                session.transfer(transformed, relationship);
            }
        }
        getLogger().info("Successfully queried {} in {} millis", new Object[] { original, elapsedMillis });
        session.transfer(original, REL_ORIGINAL);
    } catch (final SQLException e) {
        getLogger().error("Unable to query {} due to {}", new Object[] { original, e.getCause() == null ? e : e.getCause() });
        session.remove(createdFlowFiles);
        session.transfer(original, REL_FAILURE);
    } catch (final Exception e) {
        getLogger().error("Unable to query {} due to {}", new Object[] { original, e });
        session.remove(createdFlowFiles);
        session.transfer(original, REL_FAILURE);
    }
    session.adjustCounter("Records Read", recordsRead, false);
}
Also used : HashMap(java.util.HashMap) SQLException(java.sql.SQLException) RecordReader(org.apache.nifi.serialization.RecordReader) OutputStream(java.io.OutputStream) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) ResultSet(java.sql.ResultSet) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) HashSet(java.util.HashSet) FlowFile(org.apache.nifi.flowfile.FlowFile) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) InputStream(java.io.InputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) ResultSetRecordSet(org.apache.nifi.serialization.record.ResultSetRecordSet) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) ProcessException(org.apache.nifi.processor.exception.ProcessException) SQLException(java.sql.SQLException) IOException(java.io.IOException) StopWatch(org.apache.nifi.util.StopWatch) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) ProcessException(org.apache.nifi.processor.exception.ProcessException) WriteResult(org.apache.nifi.serialization.WriteResult) Relationship(org.apache.nifi.processor.Relationship) DynamicRelationship(org.apache.nifi.annotation.behavior.DynamicRelationship) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) Map(java.util.Map) HashMap(java.util.HashMap)

Example 19 with RecordSetWriter

use of org.apache.nifi.serialization.RecordSetWriter in project nifi by apache.

the class ValidateRecord method createIfNecessary.

private RecordSetWriter createIfNecessary(final RecordSetWriter writer, final RecordSetWriterFactory factory, final ProcessSession session, final FlowFile flowFile, final RecordSchema inputSchema) throws SchemaNotFoundException, IOException {
    if (writer != null) {
        return writer;
    }
    final OutputStream out = session.write(flowFile);
    final RecordSetWriter created = factory.createWriter(getLogger(), inputSchema, out);
    created.beginRecordSet();
    return created;
}
Also used : OutputStream(java.io.OutputStream) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter)

Example 20 with RecordSetWriter

use of org.apache.nifi.serialization.RecordSetWriter in project nifi by apache.

the class TestWriteAvroResult method testDataTypes.

@Test
public void testDataTypes() throws IOException {
    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/avro/datatypes.avsc"));
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final List<RecordField> subRecordFields = Collections.singletonList(new RecordField("field1", RecordFieldType.STRING.getDataType()));
    final RecordSchema subRecordSchema = new SimpleRecordSchema(subRecordFields);
    final DataType subRecordDataType = RecordFieldType.RECORD.getRecordDataType(subRecordSchema);
    final List<RecordField> fields = new ArrayList<>();
    fields.add(new RecordField("string", RecordFieldType.STRING.getDataType()));
    fields.add(new RecordField("int", RecordFieldType.INT.getDataType()));
    fields.add(new RecordField("long", RecordFieldType.LONG.getDataType()));
    fields.add(new RecordField("double", RecordFieldType.DOUBLE.getDataType()));
    fields.add(new RecordField("float", RecordFieldType.FLOAT.getDataType()));
    fields.add(new RecordField("boolean", RecordFieldType.BOOLEAN.getDataType()));
    fields.add(new RecordField("bytes", RecordFieldType.ARRAY.getArrayDataType(RecordFieldType.BYTE.getDataType())));
    fields.add(new RecordField("nullOrLong", RecordFieldType.LONG.getDataType()));
    fields.add(new RecordField("array", RecordFieldType.ARRAY.getArrayDataType(RecordFieldType.INT.getDataType())));
    fields.add(new RecordField("record", subRecordDataType));
    fields.add(new RecordField("map", RecordFieldType.MAP.getMapDataType(subRecordDataType)));
    final RecordSchema recordSchema = new SimpleRecordSchema(fields);
    final Record innerRecord = new MapRecord(subRecordSchema, Collections.singletonMap("field1", "hello"));
    final Map<String, Object> innerMap = new HashMap<>();
    innerMap.put("key1", innerRecord);
    final Map<String, Object> values = new HashMap<>();
    values.put("string", "hello");
    values.put("int", 8);
    values.put("long", 42L);
    values.put("double", 3.14159D);
    values.put("float", 1.23456F);
    values.put("boolean", true);
    values.put("bytes", AvroTypeUtil.convertByteArray("hello".getBytes()));
    values.put("nullOrLong", null);
    values.put("array", new Integer[] { 1, 2, 3 });
    values.put("record", innerRecord);
    values.put("map", innerMap);
    final Record record = new MapRecord(recordSchema, values);
    final WriteResult writeResult;
    try (final RecordSetWriter writer = createWriter(schema, baos)) {
        writeResult = writer.write(RecordSet.of(record.getSchema(), record));
    }
    verify(writeResult);
    final byte[] data = baos.toByteArray();
    try (final InputStream in = new ByteArrayInputStream(data)) {
        final GenericRecord avroRecord = readRecord(in, schema);
        assertMatch(record, avroRecord);
    }
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordField(org.apache.nifi.serialization.record.RecordField) HashMap(java.util.HashMap) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) Schema(org.apache.avro.Schema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) WriteResult(org.apache.nifi.serialization.WriteResult) ByteArrayInputStream(java.io.ByteArrayInputStream) DataType(org.apache.nifi.serialization.record.DataType) Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Test(org.junit.Test)

Aggregations

RecordSetWriter (org.apache.nifi.serialization.RecordSetWriter)21 Record (org.apache.nifi.serialization.record.Record)17 IOException (java.io.IOException)16 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)16 OutputStream (java.io.OutputStream)14 FlowFile (org.apache.nifi.flowfile.FlowFile)14 RecordSetWriterFactory (org.apache.nifi.serialization.RecordSetWriterFactory)14 HashMap (java.util.HashMap)13 InputStream (java.io.InputStream)11 ProcessException (org.apache.nifi.processor.exception.ProcessException)11 RecordReader (org.apache.nifi.serialization.RecordReader)11 WriteResult (org.apache.nifi.serialization.WriteResult)10 RecordReaderFactory (org.apache.nifi.serialization.RecordReaderFactory)8 Map (java.util.Map)7 SchemaNotFoundException (org.apache.nifi.schema.access.SchemaNotFoundException)7 ArrayList (java.util.ArrayList)6 ByteArrayInputStream (java.io.ByteArrayInputStream)5 HashSet (java.util.HashSet)4 MalformedRecordException (org.apache.nifi.serialization.MalformedRecordException)4 RecordSet (org.apache.nifi.serialization.record.RecordSet)4