Search in sources :

Example 1 with PushBackRecordSet

use of org.apache.nifi.serialization.record.PushBackRecordSet in project nifi by apache.

the class SplitRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile original = session.get();
    if (original == null) {
        return;
    }
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final int maxRecords = context.getProperty(RECORDS_PER_SPLIT).evaluateAttributeExpressions(original).asInteger();
    final List<FlowFile> splits = new ArrayList<>();
    final Map<String, String> originalAttributes = original.getAttributes();
    try {
        session.read(original, new InputStreamCallback() {

            @Override
            public void process(final InputStream in) throws IOException {
                try (final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger())) {
                    final RecordSchema schema = writerFactory.getSchema(originalAttributes, reader.getSchema());
                    final RecordSet recordSet = reader.createRecordSet();
                    final PushBackRecordSet pushbackSet = new PushBackRecordSet(recordSet);
                    while (pushbackSet.isAnotherRecord()) {
                        FlowFile split = session.create(original);
                        try {
                            final Map<String, String> attributes = new HashMap<>();
                            final WriteResult writeResult;
                            try (final OutputStream out = session.write(split);
                                final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out)) {
                                if (maxRecords == 1) {
                                    final Record record = pushbackSet.next();
                                    writeResult = writer.write(record);
                                } else {
                                    final RecordSet limitedSet = pushbackSet.limit(maxRecords);
                                    writeResult = writer.write(limitedSet);
                                }
                                attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
                                attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
                                attributes.putAll(writeResult.getAttributes());
                                session.adjustCounter("Records Split", writeResult.getRecordCount(), false);
                            }
                            split = session.putAllAttributes(split, attributes);
                        } finally {
                            splits.add(split);
                        }
                    }
                } catch (final SchemaNotFoundException | MalformedRecordException e) {
                    throw new ProcessException("Failed to parse incoming data", e);
                }
            }
        });
    } catch (final ProcessException pe) {
        getLogger().error("Failed to split {}", new Object[] { original, pe });
        session.remove(splits);
        session.transfer(original, REL_FAILURE);
        return;
    }
    session.transfer(original, REL_ORIGINAL);
    session.transfer(splits, REL_SPLITS);
    getLogger().info("Successfully split {} into {} FlowFiles, each containing up to {} records", new Object[] { original, splits.size(), maxRecords });
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) ProcessException(org.apache.nifi.processor.exception.ProcessException) WriteResult(org.apache.nifi.serialization.WriteResult) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) Record(org.apache.nifi.serialization.record.Record) PushBackRecordSet(org.apache.nifi.serialization.record.PushBackRecordSet) RecordSet(org.apache.nifi.serialization.record.RecordSet) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) HashMap(java.util.HashMap) Map(java.util.Map) PushBackRecordSet(org.apache.nifi.serialization.record.PushBackRecordSet)

Aggregations

IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 OutputStream (java.io.OutputStream)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 FlowFile (org.apache.nifi.flowfile.FlowFile)1 ProcessException (org.apache.nifi.processor.exception.ProcessException)1 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)1 RecordReader (org.apache.nifi.serialization.RecordReader)1 RecordReaderFactory (org.apache.nifi.serialization.RecordReaderFactory)1 RecordSetWriter (org.apache.nifi.serialization.RecordSetWriter)1 RecordSetWriterFactory (org.apache.nifi.serialization.RecordSetWriterFactory)1 WriteResult (org.apache.nifi.serialization.WriteResult)1 PushBackRecordSet (org.apache.nifi.serialization.record.PushBackRecordSet)1 Record (org.apache.nifi.serialization.record.Record)1 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)1 RecordSet (org.apache.nifi.serialization.record.RecordSet)1