Search in sources :

Example 1 with WriteResult

use of org.apache.nifi.serialization.WriteResult in project nifi by apache.

the class ConsumerLease method processBundle.

private boolean processBundle(final BundleTracker bundle) throws IOException {
    final RecordSetWriter writer = bundle.recordWriter;
    if (writer != null) {
        final WriteResult writeResult;
        try {
            writeResult = writer.finishRecordSet();
        } finally {
            writer.close();
        }
        if (writeResult.getRecordCount() == 0) {
            getProcessSession().remove(bundle.flowFile);
            return false;
        }
        final Map<String, String> attributes = new HashMap<>();
        attributes.putAll(writeResult.getAttributes());
        attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
        bundle.flowFile = getProcessSession().putAllAttributes(bundle.flowFile, attributes);
    }
    populateAttributes(bundle);
    return true;
}
Also used : WriteResult(org.apache.nifi.serialization.WriteResult) HashMap(java.util.HashMap) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter)

Example 2 with WriteResult

use of org.apache.nifi.serialization.WriteResult in project nifi by apache.

the class SplitRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile original = session.get();
    if (original == null) {
        return;
    }
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final int maxRecords = context.getProperty(RECORDS_PER_SPLIT).evaluateAttributeExpressions(original).asInteger();
    final List<FlowFile> splits = new ArrayList<>();
    final Map<String, String> originalAttributes = original.getAttributes();
    try {
        session.read(original, new InputStreamCallback() {

            @Override
            public void process(final InputStream in) throws IOException {
                try (final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger())) {
                    final RecordSchema schema = writerFactory.getSchema(originalAttributes, reader.getSchema());
                    final RecordSet recordSet = reader.createRecordSet();
                    final PushBackRecordSet pushbackSet = new PushBackRecordSet(recordSet);
                    while (pushbackSet.isAnotherRecord()) {
                        FlowFile split = session.create(original);
                        try {
                            final Map<String, String> attributes = new HashMap<>();
                            final WriteResult writeResult;
                            try (final OutputStream out = session.write(split);
                                final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out)) {
                                if (maxRecords == 1) {
                                    final Record record = pushbackSet.next();
                                    writeResult = writer.write(record);
                                } else {
                                    final RecordSet limitedSet = pushbackSet.limit(maxRecords);
                                    writeResult = writer.write(limitedSet);
                                }
                                attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
                                attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
                                attributes.putAll(writeResult.getAttributes());
                                session.adjustCounter("Records Split", writeResult.getRecordCount(), false);
                            }
                            split = session.putAllAttributes(split, attributes);
                        } finally {
                            splits.add(split);
                        }
                    }
                } catch (final SchemaNotFoundException | MalformedRecordException e) {
                    throw new ProcessException("Failed to parse incoming data", e);
                }
            }
        });
    } catch (final ProcessException pe) {
        getLogger().error("Failed to split {}", new Object[] { original, pe });
        session.remove(splits);
        session.transfer(original, REL_FAILURE);
        return;
    }
    session.transfer(original, REL_ORIGINAL);
    session.transfer(splits, REL_SPLITS);
    getLogger().info("Successfully split {} into {} FlowFiles, each containing up to {} records", new Object[] { original, splits.size(), maxRecords });
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) ProcessException(org.apache.nifi.processor.exception.ProcessException) WriteResult(org.apache.nifi.serialization.WriteResult) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) Record(org.apache.nifi.serialization.record.Record) PushBackRecordSet(org.apache.nifi.serialization.record.PushBackRecordSet) RecordSet(org.apache.nifi.serialization.record.RecordSet) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) HashMap(java.util.HashMap) Map(java.util.Map) PushBackRecordSet(org.apache.nifi.serialization.record.PushBackRecordSet)

Example 3 with WriteResult

use of org.apache.nifi.serialization.WriteResult in project nifi by apache.

the class RecordBin method complete.

public void complete(final String completionReason) throws IOException {
    writeLock.lock();
    try {
        if (isComplete()) {
            logger.debug("Cannot complete {} because it is already completed", new Object[] { this });
            return;
        }
        complete = true;
        logger.debug("Marked {} as complete because complete() was called", new Object[] { this });
        final WriteResult writeResult = recordWriter.finishRecordSet();
        recordWriter.close();
        logger.debug("Closed Record Writer using session {} for {}", new Object[] { session, this });
        if (flowFiles.isEmpty()) {
            session.remove(merged);
            return;
        }
        // If using defragment mode, and we don't have enough FlowFiles, then we need to fail this bin.
        final Optional<String> countAttr = thresholds.getRecordCountAttribute();
        if (countAttr.isPresent()) {
            // Ensure that at least one FlowFile has a fragment.count attribute and that they all have the same value, if they have a value.
            Integer expectedBinCount = null;
            for (final FlowFile flowFile : flowFiles) {
                final String countVal = flowFile.getAttribute(countAttr.get());
                if (countVal == null) {
                    continue;
                }
                final int count;
                try {
                    count = Integer.parseInt(countVal);
                } catch (final NumberFormatException nfe) {
                    logger.error("Could not merge bin with {} FlowFiles because the '{}' attribute had a value of '{}' for {} but expected a number", new Object[] { flowFiles.size(), countAttr.get(), countVal, flowFile });
                    fail();
                    return;
                }
                if (expectedBinCount != null && count != expectedBinCount) {
                    logger.error("Could not merge bin with {} FlowFiles because the '{}' attribute had a value of '{}' for {} but another FlowFile in the bin had a value of {}", new Object[] { flowFiles.size(), countAttr.get(), countVal, flowFile, expectedBinCount });
                    fail();
                    return;
                }
                expectedBinCount = count;
            }
            if (expectedBinCount == null) {
                logger.error("Could not merge bin with {} FlowFiles because the '{}' attribute was not present on any of the FlowFiles", new Object[] { flowFiles.size(), countAttr.get() });
                fail();
                return;
            }
            if (expectedBinCount != flowFiles.size()) {
                logger.error("Could not merge bin with {} FlowFiles because the '{}' attribute had a value of '{}' but only {} of {} FlowFiles were encountered before this bin was evicted " + "(due to to Max Bin Age being reached or due to the Maximum Number of Bins being exceeded).", new Object[] { flowFiles.size(), countAttr.get(), expectedBinCount, flowFiles.size(), expectedBinCount });
                fail();
                return;
            }
        }
        final Map<String, String> attributes = new HashMap<>();
        final AttributeStrategy attributeStrategy = AttributeStrategyUtil.strategyFor(context);
        final Map<String, String> mergedAttributes = attributeStrategy.getMergedAttributes(flowFiles);
        attributes.putAll(mergedAttributes);
        attributes.putAll(writeResult.getAttributes());
        attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
        attributes.put(CoreAttributes.MIME_TYPE.key(), recordWriter.getMimeType());
        attributes.put(MERGE_COUNT_ATTRIBUTE, Integer.toString(flowFiles.size()));
        attributes.put(MERGE_BIN_AGE_ATTRIBUTE, Long.toString(getBinAge()));
        merged = session.putAllAttributes(merged, attributes);
        session.getProvenanceReporter().join(flowFiles, merged, "Records Merged due to: " + completionReason);
        session.transfer(merged, MergeRecord.REL_MERGED);
        session.transfer(flowFiles, MergeRecord.REL_ORIGINAL);
        session.adjustCounter("Records Merged", writeResult.getRecordCount(), false);
        session.commit();
        if (logger.isDebugEnabled()) {
            final List<String> ids = flowFiles.stream().map(ff -> "id=" + ff.getId()).collect(Collectors.toList());
            logger.debug("Completed bin {} with {} records with Merged FlowFile {} using input FlowFiles {}", new Object[] { this, writeResult.getRecordCount(), merged, ids });
        }
    } catch (final Exception e) {
        session.rollback(true);
        throw e;
    } finally {
        writeLock.unlock();
    }
}
Also used : HashMap(java.util.HashMap) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) ComponentLog(org.apache.nifi.logging.ComponentLog) ArrayList(java.util.ArrayList) ByteCountingOutputStream(org.apache.nifi.stream.io.ByteCountingOutputStream) RecordReader(org.apache.nifi.serialization.RecordReader) Map(java.util.Map) ReadWriteLock(java.util.concurrent.locks.ReadWriteLock) Record(org.apache.nifi.serialization.record.Record) OutputStream(java.io.OutputStream) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) FlowFile(org.apache.nifi.flowfile.FlowFile) MergeRecord(org.apache.nifi.processors.standard.MergeRecord) WriteResult(org.apache.nifi.serialization.WriteResult) ProcessContext(org.apache.nifi.processor.ProcessContext) IOException(java.io.IOException) ProcessSession(org.apache.nifi.processor.ProcessSession) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) Collectors(java.util.stream.Collectors) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) Lock(java.util.concurrent.locks.Lock) Optional(java.util.Optional) CoreAttributes(org.apache.nifi.flowfile.attributes.CoreAttributes) Collections(java.util.Collections) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) IOException(java.io.IOException) WriteResult(org.apache.nifi.serialization.WriteResult)

Example 4 with WriteResult

use of org.apache.nifi.serialization.WriteResult in project nifi by apache.

the class AbstractRecordProcessor method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final Map<String, String> attributes = new HashMap<>();
    final AtomicInteger recordCount = new AtomicInteger();
    final FlowFile original = flowFile;
    final Map<String, String> originalAttributes = flowFile.getAttributes();
    try {
        flowFile = session.write(flowFile, new StreamCallback() {

            @Override
            public void process(final InputStream in, final OutputStream out) throws IOException {
                try (final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger())) {
                    final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema());
                    try (final RecordSetWriter writer = writerFactory.createWriter(getLogger(), writeSchema, out)) {
                        writer.beginRecordSet();
                        Record record;
                        while ((record = reader.nextRecord()) != null) {
                            final Record processed = AbstractRecordProcessor.this.process(record, writeSchema, original, context);
                            writer.write(processed);
                        }
                        final WriteResult writeResult = writer.finishRecordSet();
                        attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
                        attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
                        attributes.putAll(writeResult.getAttributes());
                        recordCount.set(writeResult.getRecordCount());
                    }
                } catch (final SchemaNotFoundException e) {
                    throw new ProcessException(e.getLocalizedMessage(), e);
                } catch (final MalformedRecordException e) {
                    throw new ProcessException("Could not parse incoming data", e);
                }
            }
        });
    } catch (final Exception e) {
        getLogger().error("Failed to process {}; will route to failure", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    flowFile = session.putAllAttributes(flowFile, attributes);
    session.transfer(flowFile, REL_SUCCESS);
    final int count = recordCount.get();
    session.adjustCounter("Records Processed", count, false);
    getLogger().info("Successfully converted {} records for {}", new Object[] { count, flowFile });
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) RecordReader(org.apache.nifi.serialization.RecordReader) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) StreamCallback(org.apache.nifi.processor.io.StreamCallback) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) ProcessException(org.apache.nifi.processor.exception.ProcessException) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) ProcessException(org.apache.nifi.processor.exception.ProcessException) WriteResult(org.apache.nifi.serialization.WriteResult) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Record(org.apache.nifi.serialization.record.Record) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) RecordSchema(org.apache.nifi.serialization.record.RecordSchema)

Example 5 with WriteResult

use of org.apache.nifi.serialization.WriteResult in project nifi by apache.

the class AbstractRouteRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final T flowFileContext;
    try {
        flowFileContext = getFlowFileContext(flowFile, context);
    } catch (final Exception e) {
        getLogger().error("Failed to process {}; routing to failure", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final AtomicInteger numRecords = new AtomicInteger(0);
    final Map<Relationship, Tuple<FlowFile, RecordSetWriter>> writers = new HashMap<>();
    final FlowFile original = flowFile;
    final Map<String, String> originalAttributes = original.getAttributes();
    try {
        session.read(flowFile, new InputStreamCallback() {

            @Override
            public void process(final InputStream in) throws IOException {
                try (final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger())) {
                    final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema());
                    Record record;
                    while ((record = reader.nextRecord()) != null) {
                        final Set<Relationship> relationships = route(record, writeSchema, original, context, flowFileContext);
                        numRecords.incrementAndGet();
                        for (final Relationship relationship : relationships) {
                            final RecordSetWriter recordSetWriter;
                            Tuple<FlowFile, RecordSetWriter> tuple = writers.get(relationship);
                            if (tuple == null) {
                                FlowFile outFlowFile = session.create(original);
                                final OutputStream out = session.write(outFlowFile);
                                recordSetWriter = writerFactory.createWriter(getLogger(), writeSchema, out);
                                recordSetWriter.beginRecordSet();
                                tuple = new Tuple<>(outFlowFile, recordSetWriter);
                                writers.put(relationship, tuple);
                            } else {
                                recordSetWriter = tuple.getValue();
                            }
                            recordSetWriter.write(record);
                        }
                    }
                } catch (final SchemaNotFoundException | MalformedRecordException e) {
                    throw new ProcessException("Could not parse incoming data", e);
                }
            }
        });
        for (final Map.Entry<Relationship, Tuple<FlowFile, RecordSetWriter>> entry : writers.entrySet()) {
            final Relationship relationship = entry.getKey();
            final Tuple<FlowFile, RecordSetWriter> tuple = entry.getValue();
            final RecordSetWriter writer = tuple.getValue();
            FlowFile childFlowFile = tuple.getKey();
            final WriteResult writeResult = writer.finishRecordSet();
            try {
                writer.close();
            } catch (final IOException ioe) {
                getLogger().warn("Failed to close Writer for {}", new Object[] { childFlowFile });
            }
            final Map<String, String> attributes = new HashMap<>();
            attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
            attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
            attributes.putAll(writeResult.getAttributes());
            childFlowFile = session.putAllAttributes(childFlowFile, attributes);
            session.transfer(childFlowFile, relationship);
            session.adjustCounter("Records Processed", writeResult.getRecordCount(), false);
            session.adjustCounter("Records Routed to " + relationship.getName(), writeResult.getRecordCount(), false);
            session.getProvenanceReporter().route(childFlowFile, relationship);
        }
    } catch (final Exception e) {
        getLogger().error("Failed to process {}", new Object[] { flowFile, e });
        for (final Tuple<FlowFile, RecordSetWriter> tuple : writers.values()) {
            try {
                tuple.getValue().close();
            } catch (final Exception e1) {
                getLogger().warn("Failed to close Writer for {}; some resources may not be cleaned up appropriately", new Object[] { tuple.getKey() });
            }
            session.remove(tuple.getKey());
        }
        session.transfer(flowFile, REL_FAILURE);
        return;
    } finally {
        for (final Tuple<FlowFile, RecordSetWriter> tuple : writers.values()) {
            final RecordSetWriter writer = tuple.getValue();
            try {
                writer.close();
            } catch (final Exception e) {
                getLogger().warn("Failed to close Record Writer for {}; some resources may not be properly cleaned up", new Object[] { tuple.getKey(), e });
            }
        }
    }
    if (isRouteOriginal()) {
        flowFile = session.putAttribute(flowFile, "record.count", String.valueOf(numRecords));
        session.transfer(flowFile, REL_ORIGINAL);
    } else {
        session.remove(flowFile);
    }
    getLogger().info("Successfully processed {}, creating {} derivative FlowFiles and processing {} records", new Object[] { flowFile, writers.size(), numRecords });
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) RecordReader(org.apache.nifi.serialization.RecordReader) OutputStream(java.io.OutputStream) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) Record(org.apache.nifi.serialization.record.Record) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) IOException(java.io.IOException) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) ProcessException(org.apache.nifi.processor.exception.ProcessException) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) ProcessException(org.apache.nifi.processor.exception.ProcessException) WriteResult(org.apache.nifi.serialization.WriteResult) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Relationship(org.apache.nifi.processor.Relationship) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) HashMap(java.util.HashMap) Map(java.util.Map) Tuple(org.apache.nifi.util.Tuple)

Aggregations

WriteResult (org.apache.nifi.serialization.WriteResult)12 HashMap (java.util.HashMap)11 RecordSetWriter (org.apache.nifi.serialization.RecordSetWriter)11 IOException (java.io.IOException)9 Record (org.apache.nifi.serialization.record.Record)9 OutputStream (java.io.OutputStream)8 FlowFile (org.apache.nifi.flowfile.FlowFile)8 RecordReader (org.apache.nifi.serialization.RecordReader)8 RecordSetWriterFactory (org.apache.nifi.serialization.RecordSetWriterFactory)8 RecordSchema (org.apache.nifi.serialization.record.RecordSchema)8 InputStream (java.io.InputStream)7 ProcessException (org.apache.nifi.processor.exception.ProcessException)7 Map (java.util.Map)6 RecordReaderFactory (org.apache.nifi.serialization.RecordReaderFactory)6 ArrayList (java.util.ArrayList)5 SchemaNotFoundException (org.apache.nifi.schema.access.SchemaNotFoundException)5 HashSet (java.util.HashSet)3 Relationship (org.apache.nifi.processor.Relationship)3 MalformedRecordException (org.apache.nifi.serialization.MalformedRecordException)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2