Search in sources :

Example 71 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class SplitContent method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final ComponentLog logger = getLogger();
    final boolean keepSequence = context.getProperty(KEEP_SEQUENCE).asBoolean();
    final boolean keepTrailingSequence;
    final boolean keepLeadingSequence;
    if (keepSequence) {
        if (context.getProperty(BYTE_SEQUENCE_LOCATION).getValue().equals(TRAILING_POSITION.getValue())) {
            keepTrailingSequence = true;
            keepLeadingSequence = false;
        } else {
            keepTrailingSequence = false;
            keepLeadingSequence = true;
        }
    } else {
        keepTrailingSequence = false;
        keepLeadingSequence = false;
    }
    final byte[] byteSequence = this.byteSequence.get();
    if (byteSequence == null) {
        // should never happen. But just in case...
        logger.error("{} Unable to obtain Byte Sequence", new Object[] { this });
        session.rollback();
        return;
    }
    final List<Tuple<Long, Long>> splits = new ArrayList<>();
    final NaiveSearchRingBuffer buffer = new NaiveSearchRingBuffer(byteSequence);
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream rawIn) throws IOException {
            long bytesRead = 0L;
            long startOffset = 0L;
            try (final InputStream in = new BufferedInputStream(rawIn)) {
                while (true) {
                    final int nextByte = in.read();
                    if (nextByte == -1) {
                        return;
                    }
                    bytesRead++;
                    boolean matched = buffer.addAndCompare((byte) (nextByte & 0xFF));
                    if (matched) {
                        long splitLength;
                        if (keepTrailingSequence) {
                            splitLength = bytesRead - startOffset;
                        } else {
                            splitLength = bytesRead - startOffset - byteSequence.length;
                        }
                        if (keepLeadingSequence && startOffset > 0) {
                            splitLength += byteSequence.length;
                        }
                        final long splitStart = (keepLeadingSequence && startOffset > 0) ? startOffset - byteSequence.length : startOffset;
                        splits.add(new Tuple<>(splitStart, splitLength));
                        startOffset = bytesRead;
                        buffer.clear();
                    }
                }
            }
        }
    });
    long lastOffsetPlusSize = -1L;
    if (splits.isEmpty()) {
        FlowFile clone = session.clone(flowFile);
        session.transfer(flowFile, REL_ORIGINAL);
        session.transfer(clone, REL_SPLITS);
        logger.info("Found no match for {}; transferring original 'original' and transferring clone {} to 'splits'", new Object[] { flowFile, clone });
        return;
    }
    final ArrayList<FlowFile> splitList = new ArrayList<>();
    for (final Tuple<Long, Long> tuple : splits) {
        long offset = tuple.getKey();
        long size = tuple.getValue();
        if (size > 0) {
            FlowFile split = session.clone(flowFile, offset, size);
            splitList.add(split);
        }
        lastOffsetPlusSize = offset + size;
    }
    // lastOffsetPlusSize indicates the ending position of the last split.
    // if the data didn't end with the byte sequence, we need one final split to run from the end
    // of the last split to the end of the content.
    long finalSplitOffset = lastOffsetPlusSize;
    if (!keepTrailingSequence && !keepLeadingSequence) {
        finalSplitOffset += byteSequence.length;
    }
    if (finalSplitOffset > -1L && finalSplitOffset < flowFile.getSize()) {
        FlowFile finalSplit = session.clone(flowFile, finalSplitOffset, flowFile.getSize() - finalSplitOffset);
        splitList.add(finalSplit);
    }
    final String fragmentId = finishFragmentAttributes(session, flowFile, splitList);
    session.transfer(splitList, REL_SPLITS);
    flowFile = FragmentAttributes.copyAttributesToOriginal(session, flowFile, fragmentId, splitList.size());
    session.transfer(flowFile, REL_ORIGINAL);
    if (splitList.size() > 10) {
        logger.info("Split {} into {} files", new Object[] { flowFile, splitList.size() });
    } else {
        logger.info("Split {} into {} files: {}", new Object[] { flowFile, splitList.size(), splitList });
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) NaiveSearchRingBuffer(org.apache.nifi.util.NaiveSearchRingBuffer) Tuple(org.apache.nifi.util.Tuple)

Example 72 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class ValidateCsv method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final CsvPreference csvPref = getPreference(context, flowFile);
    final boolean header = context.getProperty(HEADER).asBoolean();
    final ComponentLog logger = getLogger();
    final String schema = context.getProperty(SCHEMA).evaluateAttributeExpressions(flowFile).getValue();
    final CellProcessor[] cellProcs = this.parseSchema(schema);
    final boolean isWholeFFValidation = context.getProperty(VALIDATION_STRATEGY).getValue().equals(VALIDATE_WHOLE_FLOWFILE.getValue());
    final AtomicReference<Boolean> valid = new AtomicReference<Boolean>(true);
    final AtomicReference<Boolean> isFirstLineValid = new AtomicReference<Boolean>(true);
    final AtomicReference<Boolean> isFirstLineInvalid = new AtomicReference<Boolean>(true);
    final AtomicReference<Integer> okCount = new AtomicReference<Integer>(0);
    final AtomicReference<Integer> totalCount = new AtomicReference<Integer>(0);
    final AtomicReference<FlowFile> invalidFF = new AtomicReference<FlowFile>(null);
    final AtomicReference<FlowFile> validFF = new AtomicReference<FlowFile>(null);
    if (!isWholeFFValidation) {
        invalidFF.set(session.create(flowFile));
        validFF.set(session.create(flowFile));
    }
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream in) throws IOException {
            NifiCsvListReader listReader = null;
            try {
                listReader = new NifiCsvListReader(new InputStreamReader(in), csvPref);
                // handling of header
                if (header) {
                    List<String> headerList = listReader.read();
                    if (!isWholeFFValidation) {
                        invalidFF.set(session.append(invalidFF.get(), new OutputStreamCallback() {

                            @Override
                            public void process(OutputStream out) throws IOException {
                                out.write(print(headerList, csvPref, isFirstLineInvalid.get()));
                            }
                        }));
                        validFF.set(session.append(validFF.get(), new OutputStreamCallback() {

                            @Override
                            public void process(OutputStream out) throws IOException {
                                out.write(print(headerList, csvPref, isFirstLineValid.get()));
                            }
                        }));
                        isFirstLineValid.set(false);
                        isFirstLineInvalid.set(false);
                    }
                }
                boolean stop = false;
                while (!stop) {
                    try {
                        final List<Object> list = listReader.read(cellProcs);
                        stop = list == null;
                        if (!isWholeFFValidation && !stop) {
                            validFF.set(session.append(validFF.get(), new OutputStreamCallback() {

                                @Override
                                public void process(OutputStream out) throws IOException {
                                    out.write(print(list, csvPref, isFirstLineValid.get()));
                                }
                            }));
                            okCount.set(okCount.get() + 1);
                            if (isFirstLineValid.get()) {
                                isFirstLineValid.set(false);
                            }
                        }
                    } catch (final SuperCsvException e) {
                        valid.set(false);
                        if (isWholeFFValidation) {
                            logger.debug("Failed to validate {} against schema due to {}; routing to 'invalid'", new Object[] { flowFile }, e);
                            break;
                        } else {
                            // we append the invalid line to the flow file that will be routed to invalid relationship
                            invalidFF.set(session.append(invalidFF.get(), new OutputStreamCallback() {

                                @Override
                                public void process(OutputStream out) throws IOException {
                                    out.write(print(e.getCsvContext().getRowSource(), csvPref, isFirstLineInvalid.get()));
                                }
                            }));
                            if (isFirstLineInvalid.get()) {
                                isFirstLineInvalid.set(false);
                            }
                        }
                    } finally {
                        if (!isWholeFFValidation) {
                            totalCount.set(totalCount.get() + 1);
                        }
                    }
                }
            } catch (final IOException e) {
                valid.set(false);
                logger.error("Failed to validate {} against schema due to {}", new Object[] { flowFile }, e);
            } finally {
                if (listReader != null) {
                    listReader.close();
                }
            }
        }
    });
    if (isWholeFFValidation) {
        if (valid.get()) {
            logger.debug("Successfully validated {} against schema; routing to 'valid'", new Object[] { flowFile });
            session.getProvenanceReporter().route(flowFile, REL_VALID);
            session.transfer(flowFile, REL_VALID);
        } else {
            session.getProvenanceReporter().route(flowFile, REL_INVALID);
            session.transfer(flowFile, REL_INVALID);
        }
    } else {
        if (valid.get()) {
            logger.debug("Successfully validated {} against schema; routing to 'valid'", new Object[] { validFF.get() });
            session.getProvenanceReporter().route(validFF.get(), REL_VALID, "All " + totalCount.get() + " line(s) are valid");
            session.putAttribute(validFF.get(), "count.valid.lines", Integer.toString(totalCount.get()));
            session.putAttribute(validFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
            session.transfer(validFF.get(), REL_VALID);
            session.remove(invalidFF.get());
            session.remove(flowFile);
        } else if (okCount.get() != 0) {
            // because of the finally within the 'while' loop
            totalCount.set(totalCount.get() - 1);
            logger.debug("Successfully validated {}/{} line(s) in {} against schema; routing valid lines to 'valid' and invalid lines to 'invalid'", new Object[] { okCount.get(), totalCount.get(), flowFile });
            session.getProvenanceReporter().route(validFF.get(), REL_VALID, okCount.get() + " valid line(s)");
            session.putAttribute(validFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
            session.putAttribute(validFF.get(), "count.valid.lines", Integer.toString(okCount.get()));
            session.transfer(validFF.get(), REL_VALID);
            session.getProvenanceReporter().route(invalidFF.get(), REL_INVALID, (totalCount.get() - okCount.get()) + " invalid line(s)");
            session.putAttribute(invalidFF.get(), "count.invalid.lines", Integer.toString((totalCount.get() - okCount.get())));
            session.putAttribute(invalidFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
            session.transfer(invalidFF.get(), REL_INVALID);
            session.remove(flowFile);
        } else {
            logger.debug("All lines in {} are invalid; routing to 'invalid'", new Object[] { invalidFF.get() });
            session.getProvenanceReporter().route(invalidFF.get(), REL_INVALID, "All " + totalCount.get() + " line(s) are invalid");
            session.putAttribute(invalidFF.get(), "count.invalid.lines", Integer.toString(totalCount.get()));
            session.putAttribute(invalidFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
            session.transfer(invalidFF.get(), REL_INVALID);
            session.remove(validFF.get());
            session.remove(flowFile);
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) SuperCsvException(org.supercsv.exception.SuperCsvException) CsvPreference(org.supercsv.prefs.CsvPreference) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) CellProcessor(org.supercsv.cellprocessor.ift.CellProcessor) List(java.util.List) ArrayList(java.util.ArrayList) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback)

Example 73 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class TestJmsConsumer method testMap2FlowFileTextMessage.

@Test
public void testMap2FlowFileTextMessage() throws Exception {
    TestRunner runner = TestRunners.newTestRunner(GetJMSQueue.class);
    TextMessage textMessage = new ActiveMQTextMessage();
    String payload = "Hello world!";
    textMessage.setText(payload);
    ProcessContext context = runner.getProcessContext();
    ProcessSession session = runner.getProcessSessionFactory().createSession();
    ProcessorInitializationContext pic = new MockProcessorInitializationContext(runner.getProcessor(), (MockProcessContext) runner.getProcessContext());
    JmsProcessingSummary summary = JmsConsumer.map2FlowFile(context, session, textMessage, true, pic.getLogger());
    assertEquals("TextMessage content length should equal to FlowFile content size", payload.length(), summary.getLastFlowFile().getSize());
    final byte[] buffer = new byte[payload.length()];
    runner.clearTransferState();
    session.read(summary.getLastFlowFile(), new InputStreamCallback() {

        @Override
        public void process(InputStream in) throws IOException {
            StreamUtils.fillBuffer(in, buffer, false);
        }
    });
    String contentString = new String(buffer, "UTF-8");
    assertEquals("", payload, contentString);
}
Also used : ProcessSession(org.apache.nifi.processor.ProcessSession) TestRunner(org.apache.nifi.util.TestRunner) InputStream(java.io.InputStream) MockProcessorInitializationContext(org.apache.nifi.util.MockProcessorInitializationContext) JmsProcessingSummary(org.apache.nifi.processors.standard.util.JmsProcessingSummary) IOException(java.io.IOException) ProcessContext(org.apache.nifi.processor.ProcessContext) MockProcessContext(org.apache.nifi.util.MockProcessContext) MockProcessorInitializationContext(org.apache.nifi.util.MockProcessorInitializationContext) ProcessorInitializationContext(org.apache.nifi.processor.ProcessorInitializationContext) ActiveMQTextMessage(org.apache.activemq.command.ActiveMQTextMessage) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) TextMessage(javax.jms.TextMessage) ActiveMQTextMessage(org.apache.activemq.command.ActiveMQTextMessage) Test(org.junit.Test)

Example 74 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class PublishAMQP method extractMessage.

/**
 * Extracts contents of the {@link FlowFile} as byte array.
 */
private byte[] extractMessage(FlowFile flowFile, ProcessSession session) {
    final byte[] messageContent = new byte[(int) flowFile.getSize()];
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream in) throws IOException {
            StreamUtils.fillBuffer(in, messageContent, true);
        }
    });
    return messageContent;
}
Also used : InputStream(java.io.InputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) IOException(java.io.IOException)

Example 75 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class ExtractAvroMetadata method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final Map<String, String> avroMetadata = new HashMap<>();
    final Set<String> requestedMetadataKeys = new HashSet<>();
    final boolean countRecords = context.getProperty(COUNT_ITEMS).asBoolean();
    final String fingerprintAlgorithm = context.getProperty(FINGERPRINT_ALGORITHM).getValue();
    final String metadataKeysValue = context.getProperty(METADATA_KEYS).getValue();
    if (!StringUtils.isEmpty(metadataKeysValue)) {
        final String[] keys = metadataKeysValue.split("\\s*,\\s*");
        for (final String key : keys) {
            requestedMetadataKeys.add(key.trim());
        }
    }
    try {
        session.read(flowFile, new InputStreamCallback() {

            @Override
            public void process(InputStream rawIn) throws IOException {
                try (final InputStream in = new BufferedInputStream(rawIn);
                    final DataFileStream<GenericRecord> reader = new DataFileStream<>(in, new GenericDatumReader<GenericRecord>())) {
                    final Schema schema = reader.getSchema();
                    if (schema == null) {
                        throw new ProcessException("Avro schema was null");
                    }
                    for (String key : reader.getMetaKeys()) {
                        if (requestedMetadataKeys.contains(key)) {
                            avroMetadata.put(key, reader.getMetaString(key));
                        }
                    }
                    try {
                        final byte[] rawFingerprint = SchemaNormalization.parsingFingerprint(fingerprintAlgorithm, schema);
                        avroMetadata.put(SCHEMA_FINGERPRINT_ATTR, Hex.encodeHexString(rawFingerprint));
                        avroMetadata.put(SCHEMA_TYPE_ATTR, schema.getType().getName());
                        avroMetadata.put(SCHEMA_NAME_ATTR, schema.getName());
                    } catch (NoSuchAlgorithmException e) {
                        // shouldn't happen since allowable values are valid algorithms
                        throw new ProcessException(e);
                    }
                    if (countRecords) {
                        long recordCount = reader.getBlockCount();
                        try {
                            while (reader.nextBlock() != null) {
                                recordCount += reader.getBlockCount();
                            }
                        } catch (NoSuchElementException e) {
                        // happens at end of file
                        }
                        avroMetadata.put(ITEM_COUNT_ATTR, String.valueOf(recordCount));
                    }
                }
            }
        });
    } catch (final ProcessException pe) {
        getLogger().error("Failed to extract Avro metadata for {} due to {}; transferring to failure", new Object[] { flowFile, pe });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    flowFile = session.putAllAttributes(flowFile, avroMetadata);
    session.transfer(flowFile, REL_SUCCESS);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) IOException(java.io.IOException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) DataFileStream(org.apache.avro.file.DataFileStream) ProcessException(org.apache.nifi.processor.exception.ProcessException) BufferedInputStream(java.io.BufferedInputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) NoSuchElementException(java.util.NoSuchElementException) HashSet(java.util.HashSet)

Aggregations

IOException (java.io.IOException)80 InputStream (java.io.InputStream)80 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)80 FlowFile (org.apache.nifi.flowfile.FlowFile)62 ProcessException (org.apache.nifi.processor.exception.ProcessException)35 ComponentLog (org.apache.nifi.logging.ComponentLog)27 HashMap (java.util.HashMap)25 AtomicReference (java.util.concurrent.atomic.AtomicReference)23 OutputStream (java.io.OutputStream)19 BufferedInputStream (java.io.BufferedInputStream)18 ArrayList (java.util.ArrayList)17 Map (java.util.Map)17 OutputStreamCallback (org.apache.nifi.processor.io.OutputStreamCallback)13 ByteArrayOutputStream (java.io.ByteArrayOutputStream)11 BufferedInputStream (org.apache.nifi.stream.io.BufferedInputStream)10 StopWatch (org.apache.nifi.util.StopWatch)10 HashSet (java.util.HashSet)9 Charset (java.nio.charset.Charset)8 FileInputStream (java.io.FileInputStream)7 ProcessSession (org.apache.nifi.processor.ProcessSession)7