Search in sources :

Example 16 with RecordReader

use of org.apache.nifi.serialization.RecordReader in project nifi by apache.

the class ListenUDPRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final int maxBatchSize = context.getProperty(BATCH_SIZE).asInteger();
    final Map<String, FlowFileRecordWriter> flowFileRecordWriters = new HashMap<>();
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    for (int i = 0; i < maxBatchSize; i++) {
        // this processor isn't leveraging the error queue so don't bother polling to avoid the overhead
        // if the error handling is ever changed to use the error queue then this flag needs to be changed as well
        final StandardEvent event = getMessage(true, false, session);
        // break out if we don't have any messages, don't yield since we already do a long poll inside getMessage
        if (event == null) {
            break;
        }
        // attempt to read all of the records from the current datagram into a list in memory so that we can ensure the
        // entire datagram can be read as records, and if not transfer the whole thing to parse.failure
        final RecordReader reader;
        final List<Record> records = new ArrayList<>();
        try (final InputStream in = new ByteArrayInputStream(event.getData())) {
            reader = readerFactory.createRecordReader(Collections.emptyMap(), in, getLogger());
            Record record;
            while ((record = reader.nextRecord()) != null) {
                records.add(record);
            }
        } catch (final Exception e) {
            handleParseFailure(event, session, e);
            continue;
        }
        if (records.size() == 0) {
            handleParseFailure(event, session, null);
            continue;
        }
        // see if we already started a flow file and writer for the given sender
        // if an exception happens creating the flow file or writer, put the event in the error queue to try it again later
        FlowFileRecordWriter flowFileRecordWriter = flowFileRecordWriters.get(event.getSender());
        if (flowFileRecordWriter == null) {
            FlowFile flowFile = null;
            OutputStream rawOut = null;
            RecordSetWriter writer = null;
            try {
                flowFile = session.create();
                rawOut = session.write(flowFile);
                final Record firstRecord = records.get(0);
                final RecordSchema recordSchema = firstRecord.getSchema();
                final RecordSchema writeSchema = writerFactory.getSchema(Collections.emptyMap(), recordSchema);
                writer = writerFactory.createWriter(getLogger(), writeSchema, rawOut);
                writer.beginRecordSet();
                flowFileRecordWriter = new FlowFileRecordWriter(flowFile, writer);
                flowFileRecordWriters.put(event.getSender(), flowFileRecordWriter);
            } catch (final Exception ex) {
                getLogger().error("Failed to properly initialize record writer. Datagram will be queued for re-processing.", ex);
                try {
                    if (writer != null) {
                        writer.close();
                    }
                } catch (final Exception e) {
                    getLogger().warn("Failed to close Record Writer", e);
                }
                if (rawOut != null) {
                    IOUtils.closeQuietly(rawOut);
                }
                if (flowFile != null) {
                    session.remove(flowFile);
                }
                context.yield();
                break;
            }
        }
        // attempt to write each record, if any record fails then remove the flow file and break out of the loop
        final RecordSetWriter writer = flowFileRecordWriter.getRecordWriter();
        try {
            for (final Record record : records) {
                writer.write(record);
            }
        } catch (Exception e) {
            getLogger().error("Failed to write records due to: " + e.getMessage(), e);
            IOUtils.closeQuietly(writer);
            session.remove(flowFileRecordWriter.getFlowFile());
            flowFileRecordWriters.remove(event.getSender());
            break;
        }
    }
    for (final Map.Entry<String, FlowFileRecordWriter> entry : flowFileRecordWriters.entrySet()) {
        final String sender = entry.getKey();
        final FlowFileRecordWriter flowFileRecordWriter = entry.getValue();
        final RecordSetWriter writer = flowFileRecordWriter.getRecordWriter();
        FlowFile flowFile = flowFileRecordWriter.getFlowFile();
        try {
            final WriteResult writeResult;
            try {
                writeResult = writer.finishRecordSet();
            } finally {
                writer.close();
            }
            if (writeResult.getRecordCount() == 0) {
                session.remove(flowFile);
                continue;
            }
            final Map<String, String> attributes = new HashMap<>();
            attributes.putAll(getAttributes(sender));
            attributes.putAll(writeResult.getAttributes());
            attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
            attributes.put(RECORD_COUNT_ATTR, String.valueOf(writeResult.getRecordCount()));
            flowFile = session.putAllAttributes(flowFile, attributes);
            session.transfer(flowFile, REL_SUCCESS);
            final String transitUri = getTransitUri(sender);
            session.getProvenanceReporter().receive(flowFile, transitUri);
        } catch (final Exception e) {
            getLogger().error("Unable to properly complete record set due to: " + e.getMessage(), e);
            session.remove(flowFile);
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) StandardEvent(org.apache.nifi.processor.util.listen.event.StandardEvent) WriteResult(org.apache.nifi.serialization.WriteResult) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) ByteArrayInputStream(java.io.ByteArrayInputStream) Record(org.apache.nifi.serialization.record.Record) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) Map(java.util.Map) HashMap(java.util.HashMap)

Example 17 with RecordReader

use of org.apache.nifi.serialization.RecordReader in project nifi by apache.

the class PartitionRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final Map<String, RecordPath> recordPaths;
    try {
        recordPaths = context.getProperties().keySet().stream().filter(prop -> prop.isDynamic()).collect(Collectors.toMap(prop -> prop.getName(), prop -> getRecordPath(context, prop, flowFile)));
    } catch (final Exception e) {
        getLogger().error("Failed to compile RecordPath for {}; routing to failure", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    final Map<RecordValueMap, RecordSetWriter> writerMap = new HashMap<>();
    try (final InputStream in = session.read(flowFile)) {
        final Map<String, String> originalAttributes = flowFile.getAttributes();
        final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger());
        final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema());
        Record record;
        while ((record = reader.nextRecord()) != null) {
            final Map<String, List<ValueWrapper>> recordMap = new HashMap<>();
            // Evaluate all of the RecordPath's for this Record
            for (final Map.Entry<String, RecordPath> entry : recordPaths.entrySet()) {
                final String propName = entry.getKey();
                final RecordPath recordPath = entry.getValue();
                final Stream<FieldValue> fieldValueStream = recordPath.evaluate(record).getSelectedFields();
                final List<ValueWrapper> fieldValues = fieldValueStream.map(fieldVal -> new ValueWrapper(fieldVal.getValue())).collect(Collectors.toList());
                recordMap.put(propName, fieldValues);
            }
            final RecordValueMap recordValueMap = new RecordValueMap(recordMap);
            // Get the RecordSetWriter that contains the same values for all RecordPaths - or create one if none exists.
            RecordSetWriter writer = writerMap.get(recordValueMap);
            if (writer == null) {
                final FlowFile childFlowFile = session.create(flowFile);
                recordValueMap.setFlowFile(childFlowFile);
                final OutputStream out = session.write(childFlowFile);
                writer = writerFactory.createWriter(getLogger(), writeSchema, out);
                writer.beginRecordSet();
                writerMap.put(recordValueMap, writer);
            }
            writer.write(record);
        }
        // For each RecordSetWriter, finish the record set and close the writer.
        for (final Map.Entry<RecordValueMap, RecordSetWriter> entry : writerMap.entrySet()) {
            final RecordValueMap valueMap = entry.getKey();
            final RecordSetWriter writer = entry.getValue();
            final WriteResult writeResult = writer.finishRecordSet();
            writer.close();
            final Map<String, String> attributes = new HashMap<>();
            attributes.putAll(valueMap.getAttributes());
            attributes.putAll(writeResult.getAttributes());
            attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
            attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
            FlowFile childFlowFile = valueMap.getFlowFile();
            childFlowFile = session.putAllAttributes(childFlowFile, attributes);
            session.adjustCounter("Record Processed", writeResult.getRecordCount(), false);
        }
    } catch (final Exception e) {
        for (final Map.Entry<RecordValueMap, RecordSetWriter> entry : writerMap.entrySet()) {
            final RecordValueMap valueMap = entry.getKey();
            final RecordSetWriter writer = entry.getValue();
            try {
                writer.close();
            } catch (final IOException e1) {
                getLogger().warn("Failed to close Record Writer for {}; some resources may not be cleaned up appropriately", new Object[] { flowFile, e1 });
            }
            session.remove(valueMap.getFlowFile());
        }
        getLogger().error("Failed to partition {}", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    // because we want to ensure that we are able to remove the child flowfiles in case of a failure.
    for (final RecordValueMap valueMap : writerMap.keySet()) {
        session.transfer(valueMap.getFlowFile(), REL_SUCCESS);
    }
    session.transfer(flowFile, REL_ORIGINAL);
}
Also used : Arrays(java.util.Arrays) CapabilityDescription(org.apache.nifi.annotation.documentation.CapabilityDescription) ValidationContext(org.apache.nifi.components.ValidationContext) HashMap(java.util.HashMap) EventDriven(org.apache.nifi.annotation.behavior.EventDriven) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) ProcessException(org.apache.nifi.processor.exception.ProcessException) RecordPath(org.apache.nifi.record.path.RecordPath) ArrayList(java.util.ArrayList) RecordPathValidator(org.apache.nifi.record.path.validation.RecordPathValidator) HashSet(java.util.HashSet) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) WritesAttributes(org.apache.nifi.annotation.behavior.WritesAttributes) Relationship(org.apache.nifi.processor.Relationship) RecordReader(org.apache.nifi.serialization.RecordReader) Map(java.util.Map) Requirement(org.apache.nifi.annotation.behavior.InputRequirement.Requirement) ValidationResult(org.apache.nifi.components.ValidationResult) Record(org.apache.nifi.serialization.record.Record) OutputStream(java.io.OutputStream) FlowFile(org.apache.nifi.flowfile.FlowFile) Collection(java.util.Collection) WriteResult(org.apache.nifi.serialization.WriteResult) DataTypeUtils(org.apache.nifi.serialization.record.util.DataTypeUtils) ProcessContext(org.apache.nifi.processor.ProcessContext) Set(java.util.Set) IOException(java.io.IOException) ProcessSession(org.apache.nifi.processor.ProcessSession) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) WritesAttribute(org.apache.nifi.annotation.behavior.WritesAttribute) SeeAlso(org.apache.nifi.annotation.documentation.SeeAlso) Collectors(java.util.stream.Collectors) List(java.util.List) InputRequirement(org.apache.nifi.annotation.behavior.InputRequirement) Stream(java.util.stream.Stream) DynamicProperty(org.apache.nifi.annotation.behavior.DynamicProperty) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) SupportsBatching(org.apache.nifi.annotation.behavior.SupportsBatching) RecordPathCache(org.apache.nifi.record.path.util.RecordPathCache) AbstractProcessor(org.apache.nifi.processor.AbstractProcessor) Tags(org.apache.nifi.annotation.documentation.Tags) CoreAttributes(org.apache.nifi.flowfile.attributes.CoreAttributes) FieldValue(org.apache.nifi.record.path.FieldValue) Collections(java.util.Collections) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) InputStream(java.io.InputStream) HashMap(java.util.HashMap) RecordReader(org.apache.nifi.serialization.RecordReader) OutputStream(java.io.OutputStream) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) Record(org.apache.nifi.serialization.record.Record) ArrayList(java.util.ArrayList) List(java.util.List) FieldValue(org.apache.nifi.record.path.FieldValue) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) RecordPath(org.apache.nifi.record.path.RecordPath) IOException(java.io.IOException) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) WriteResult(org.apache.nifi.serialization.WriteResult) HashMap(java.util.HashMap) Map(java.util.Map)

Example 18 with RecordReader

use of org.apache.nifi.serialization.RecordReader in project nifi by apache.

the class PutParquetTest method testIOExceptionFromReaderShouldRouteToRetry.

@Test
public void testIOExceptionFromReaderShouldRouteToRetry() throws InitializationException, IOException, MalformedRecordException, SchemaNotFoundException {
    configure(proc, 10);
    final RecordSet recordSet = Mockito.mock(RecordSet.class);
    when(recordSet.next()).thenThrow(new IOException("ERROR"));
    final RecordReader recordReader = Mockito.mock(RecordReader.class);
    when(recordReader.createRecordSet()).thenReturn(recordSet);
    when(recordReader.getSchema()).thenReturn(AvroTypeUtil.createSchema(schema));
    final RecordReaderFactory readerFactory = Mockito.mock(RecordReaderFactory.class);
    when(readerFactory.getIdentifier()).thenReturn("mock-reader-factory");
    when(readerFactory.createRecordReader(any(FlowFile.class), any(InputStream.class), any(ComponentLog.class))).thenReturn(recordReader);
    testRunner.addControllerService("mock-reader-factory", readerFactory);
    testRunner.enableControllerService(readerFactory);
    testRunner.setProperty(PutParquet.RECORD_READER, "mock-reader-factory");
    final String filename = "testMalformedRecordExceptionShouldRouteToFailure-" + System.currentTimeMillis();
    final Map<String, String> flowFileAttributes = new HashMap<>();
    flowFileAttributes.put(CoreAttributes.FILENAME.key(), filename);
    testRunner.enqueue("trigger", flowFileAttributes);
    testRunner.run();
    testRunner.assertAllFlowFilesTransferred(PutParquet.REL_RETRY, 1);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) MockFlowFile(org.apache.nifi.util.MockFlowFile) HashMap(java.util.HashMap) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) RecordSet(org.apache.nifi.serialization.record.RecordSet) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) Test(org.junit.Test)

Example 19 with RecordReader

use of org.apache.nifi.serialization.RecordReader in project nifi by apache.

the class PutParquetTest method testMalformedRecordExceptionFromReaderShouldRouteToFailure.

@Test
public void testMalformedRecordExceptionFromReaderShouldRouteToFailure() throws InitializationException, IOException, MalformedRecordException, SchemaNotFoundException {
    configure(proc, 10);
    final RecordReader recordReader = Mockito.mock(RecordReader.class);
    when(recordReader.nextRecord()).thenThrow(new MalformedRecordException("ERROR"));
    final RecordReaderFactory readerFactory = Mockito.mock(RecordReaderFactory.class);
    when(readerFactory.getIdentifier()).thenReturn("mock-reader-factory");
    when(readerFactory.createRecordReader(any(FlowFile.class), any(InputStream.class), any(ComponentLog.class))).thenReturn(recordReader);
    testRunner.addControllerService("mock-reader-factory", readerFactory);
    testRunner.enableControllerService(readerFactory);
    testRunner.setProperty(PutParquet.RECORD_READER, "mock-reader-factory");
    final String filename = "testMalformedRecordExceptionShouldRouteToFailure-" + System.currentTimeMillis();
    final Map<String, String> flowFileAttributes = new HashMap<>();
    flowFileAttributes.put(CoreAttributes.FILENAME.key(), filename);
    testRunner.enqueue("trigger", flowFileAttributes);
    testRunner.run();
    testRunner.assertAllFlowFilesTransferred(PutParquet.REL_FAILURE, 1);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) MockFlowFile(org.apache.nifi.util.MockFlowFile) HashMap(java.util.HashMap) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) ComponentLog(org.apache.nifi.logging.ComponentLog) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) Test(org.junit.Test)

Example 20 with RecordReader

use of org.apache.nifi.serialization.RecordReader in project nifi by apache.

the class AbstractKudu method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final FlowFile flowFile = session.get();
    try {
        if (flowFile == null)
            return;
        final Map<String, String> attributes = new HashMap<String, String>();
        final AtomicReference<Throwable> exceptionHolder = new AtomicReference<>(null);
        final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
        final KuduSession kuduSession = this.getKuduSession(kuduClient);
        session.read(flowFile, (final InputStream rawIn) -> {
            RecordReader recordReader = null;
            try (final BufferedInputStream in = new BufferedInputStream(rawIn)) {
                try {
                    recordReader = recordReaderFactory.createRecordReader(flowFile, in, getLogger());
                } catch (Exception ex) {
                    final RecordReaderFactoryException rrfe = new RecordReaderFactoryException("Unable to create RecordReader", ex);
                    exceptionHolder.set(rrfe);
                    return;
                }
                List<String> fieldNames = recordReader.getSchema().getFieldNames();
                final RecordSet recordSet = recordReader.createRecordSet();
                if (skipHeadLine)
                    recordSet.next();
                int numOfAddedRecord = 0;
                Record record = recordSet.next();
                while (record != null) {
                    org.apache.kudu.client.Operation oper = null;
                    if (operationType == OperationType.UPSERT) {
                        oper = upsertRecordToKudu(kuduTable, record, fieldNames);
                    } else {
                        oper = insertRecordToKudu(kuduTable, record, fieldNames);
                    }
                    kuduSession.apply(oper);
                    numOfAddedRecord++;
                    record = recordSet.next();
                }
                getLogger().info("KUDU: number of inserted records: " + numOfAddedRecord);
                attributes.put(RECORD_COUNT_ATTR, String.valueOf(numOfAddedRecord));
            } catch (KuduException ex) {
                getLogger().error("Exception occurred while interacting with Kudu due to " + ex.getMessage(), ex);
                exceptionHolder.set(ex);
            } catch (Exception e) {
                exceptionHolder.set(e);
            } finally {
                IOUtils.closeQuietly(recordReader);
            }
        });
        kuduSession.close();
        if (exceptionHolder.get() != null) {
            throw exceptionHolder.get();
        }
        // Update flow file's attributes after the ingestion
        session.putAllAttributes(flowFile, attributes);
        session.transfer(flowFile, REL_SUCCESS);
        session.getProvenanceReporter().send(flowFile, "Successfully added flowfile to kudu");
    } catch (IOException | FlowFileAccessException e) {
        getLogger().error("Failed to write due to {}", new Object[] { e });
        session.transfer(flowFile, REL_FAILURE);
    } catch (Throwable t) {
        getLogger().error("Failed to write due to {}", new Object[] { t });
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : KuduSession(org.apache.kudu.client.KuduSession) HashMap(java.util.HashMap) RecordReader(org.apache.nifi.serialization.RecordReader) KuduException(org.apache.kudu.client.KuduException) BufferedInputStream(java.io.BufferedInputStream) RecordReaderFactoryException(org.apache.nifi.processors.hadoop.exception.RecordReaderFactoryException) Record(org.apache.nifi.serialization.record.Record) RecordSet(org.apache.nifi.serialization.record.RecordSet) FlowFile(org.apache.nifi.flowfile.FlowFile) FlowFileAccessException(org.apache.nifi.processor.exception.FlowFileAccessException) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) KuduException(org.apache.kudu.client.KuduException) ProcessException(org.apache.nifi.processor.exception.ProcessException) RecordReaderFactoryException(org.apache.nifi.processors.hadoop.exception.RecordReaderFactoryException) FlowFileAccessException(org.apache.nifi.processor.exception.FlowFileAccessException) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory)

Aggregations

RecordReader (org.apache.nifi.serialization.RecordReader)28 InputStream (java.io.InputStream)22 FlowFile (org.apache.nifi.flowfile.FlowFile)22 IOException (java.io.IOException)21 RecordReaderFactory (org.apache.nifi.serialization.RecordReaderFactory)21 ProcessException (org.apache.nifi.processor.exception.ProcessException)18 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)17 HashMap (java.util.HashMap)16 MalformedRecordException (org.apache.nifi.serialization.MalformedRecordException)16 Record (org.apache.nifi.serialization.record.Record)15 RecordSetWriterFactory (org.apache.nifi.serialization.RecordSetWriterFactory)14 SchemaNotFoundException (org.apache.nifi.schema.access.SchemaNotFoundException)13 RecordSetWriter (org.apache.nifi.serialization.RecordSetWriter)12 OutputStream (java.io.OutputStream)10 Map (java.util.Map)10 WriteResult (org.apache.nifi.serialization.WriteResult)8 RecordSet (org.apache.nifi.serialization.record.RecordSet)8 ArrayList (java.util.ArrayList)7 BufferedInputStream (java.io.BufferedInputStream)5 ComponentLog (org.apache.nifi.logging.ComponentLog)5