Search in sources :

Example 1 with NaiveSearchRingBuffer

use of org.apache.nifi.util.NaiveSearchRingBuffer in project nifi by apache.

the class SplitContent method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final ComponentLog logger = getLogger();
    final boolean keepSequence = context.getProperty(KEEP_SEQUENCE).asBoolean();
    final boolean keepTrailingSequence;
    final boolean keepLeadingSequence;
    if (keepSequence) {
        if (context.getProperty(BYTE_SEQUENCE_LOCATION).getValue().equals(TRAILING_POSITION.getValue())) {
            keepTrailingSequence = true;
            keepLeadingSequence = false;
        } else {
            keepTrailingSequence = false;
            keepLeadingSequence = true;
        }
    } else {
        keepTrailingSequence = false;
        keepLeadingSequence = false;
    }
    final byte[] byteSequence = this.byteSequence.get();
    if (byteSequence == null) {
        // should never happen. But just in case...
        logger.error("{} Unable to obtain Byte Sequence", new Object[] { this });
        session.rollback();
        return;
    }
    final List<Tuple<Long, Long>> splits = new ArrayList<>();
    final NaiveSearchRingBuffer buffer = new NaiveSearchRingBuffer(byteSequence);
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream rawIn) throws IOException {
            long bytesRead = 0L;
            long startOffset = 0L;
            try (final InputStream in = new BufferedInputStream(rawIn)) {
                while (true) {
                    final int nextByte = in.read();
                    if (nextByte == -1) {
                        return;
                    }
                    bytesRead++;
                    boolean matched = buffer.addAndCompare((byte) (nextByte & 0xFF));
                    if (matched) {
                        long splitLength;
                        if (keepTrailingSequence) {
                            splitLength = bytesRead - startOffset;
                        } else {
                            splitLength = bytesRead - startOffset - byteSequence.length;
                        }
                        if (keepLeadingSequence && startOffset > 0) {
                            splitLength += byteSequence.length;
                        }
                        final long splitStart = (keepLeadingSequence && startOffset > 0) ? startOffset - byteSequence.length : startOffset;
                        splits.add(new Tuple<>(splitStart, splitLength));
                        startOffset = bytesRead;
                        buffer.clear();
                    }
                }
            }
        }
    });
    long lastOffsetPlusSize = -1L;
    if (splits.isEmpty()) {
        FlowFile clone = session.clone(flowFile);
        session.transfer(flowFile, REL_ORIGINAL);
        session.transfer(clone, REL_SPLITS);
        logger.info("Found no match for {}; transferring original 'original' and transferring clone {} to 'splits'", new Object[] { flowFile, clone });
        return;
    }
    final ArrayList<FlowFile> splitList = new ArrayList<>();
    for (final Tuple<Long, Long> tuple : splits) {
        long offset = tuple.getKey();
        long size = tuple.getValue();
        if (size > 0) {
            FlowFile split = session.clone(flowFile, offset, size);
            splitList.add(split);
        }
        lastOffsetPlusSize = offset + size;
    }
    // lastOffsetPlusSize indicates the ending position of the last split.
    // if the data didn't end with the byte sequence, we need one final split to run from the end
    // of the last split to the end of the content.
    long finalSplitOffset = lastOffsetPlusSize;
    if (!keepTrailingSequence && !keepLeadingSequence) {
        finalSplitOffset += byteSequence.length;
    }
    if (finalSplitOffset > -1L && finalSplitOffset < flowFile.getSize()) {
        FlowFile finalSplit = session.clone(flowFile, finalSplitOffset, flowFile.getSize() - finalSplitOffset);
        splitList.add(finalSplit);
    }
    final String fragmentId = finishFragmentAttributes(session, flowFile, splitList);
    session.transfer(splitList, REL_SPLITS);
    flowFile = FragmentAttributes.copyAttributesToOriginal(session, flowFile, fragmentId, splitList.size());
    session.transfer(flowFile, REL_ORIGINAL);
    if (splitList.size() > 10) {
        logger.info("Split {} into {} files", new Object[] { flowFile, splitList.size() });
    } else {
        logger.info("Split {} into {} files: {}", new Object[] { flowFile, splitList.size(), splitList });
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) NaiveSearchRingBuffer(org.apache.nifi.util.NaiveSearchRingBuffer) Tuple(org.apache.nifi.util.Tuple)

Aggregations

IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 ArrayList (java.util.ArrayList)1 FlowFile (org.apache.nifi.flowfile.FlowFile)1 ComponentLog (org.apache.nifi.logging.ComponentLog)1 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)1 BufferedInputStream (org.apache.nifi.stream.io.BufferedInputStream)1 NaiveSearchRingBuffer (org.apache.nifi.util.NaiveSearchRingBuffer)1 Tuple (org.apache.nifi.util.Tuple)1