use of org.apache.nifi.util.NaiveSearchRingBuffer in project nifi by apache.
the class SplitContent method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final ComponentLog logger = getLogger();
final boolean keepSequence = context.getProperty(KEEP_SEQUENCE).asBoolean();
final boolean keepTrailingSequence;
final boolean keepLeadingSequence;
if (keepSequence) {
if (context.getProperty(BYTE_SEQUENCE_LOCATION).getValue().equals(TRAILING_POSITION.getValue())) {
keepTrailingSequence = true;
keepLeadingSequence = false;
} else {
keepTrailingSequence = false;
keepLeadingSequence = true;
}
} else {
keepTrailingSequence = false;
keepLeadingSequence = false;
}
final byte[] byteSequence = this.byteSequence.get();
if (byteSequence == null) {
// should never happen. But just in case...
logger.error("{} Unable to obtain Byte Sequence", new Object[] { this });
session.rollback();
return;
}
final List<Tuple<Long, Long>> splits = new ArrayList<>();
final NaiveSearchRingBuffer buffer = new NaiveSearchRingBuffer(byteSequence);
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream rawIn) throws IOException {
long bytesRead = 0L;
long startOffset = 0L;
try (final InputStream in = new BufferedInputStream(rawIn)) {
while (true) {
final int nextByte = in.read();
if (nextByte == -1) {
return;
}
bytesRead++;
boolean matched = buffer.addAndCompare((byte) (nextByte & 0xFF));
if (matched) {
long splitLength;
if (keepTrailingSequence) {
splitLength = bytesRead - startOffset;
} else {
splitLength = bytesRead - startOffset - byteSequence.length;
}
if (keepLeadingSequence && startOffset > 0) {
splitLength += byteSequence.length;
}
final long splitStart = (keepLeadingSequence && startOffset > 0) ? startOffset - byteSequence.length : startOffset;
splits.add(new Tuple<>(splitStart, splitLength));
startOffset = bytesRead;
buffer.clear();
}
}
}
}
});
long lastOffsetPlusSize = -1L;
if (splits.isEmpty()) {
FlowFile clone = session.clone(flowFile);
session.transfer(flowFile, REL_ORIGINAL);
session.transfer(clone, REL_SPLITS);
logger.info("Found no match for {}; transferring original 'original' and transferring clone {} to 'splits'", new Object[] { flowFile, clone });
return;
}
final ArrayList<FlowFile> splitList = new ArrayList<>();
for (final Tuple<Long, Long> tuple : splits) {
long offset = tuple.getKey();
long size = tuple.getValue();
if (size > 0) {
FlowFile split = session.clone(flowFile, offset, size);
splitList.add(split);
}
lastOffsetPlusSize = offset + size;
}
// lastOffsetPlusSize indicates the ending position of the last split.
// if the data didn't end with the byte sequence, we need one final split to run from the end
// of the last split to the end of the content.
long finalSplitOffset = lastOffsetPlusSize;
if (!keepTrailingSequence && !keepLeadingSequence) {
finalSplitOffset += byteSequence.length;
}
if (finalSplitOffset > -1L && finalSplitOffset < flowFile.getSize()) {
FlowFile finalSplit = session.clone(flowFile, finalSplitOffset, flowFile.getSize() - finalSplitOffset);
splitList.add(finalSplit);
}
final String fragmentId = finishFragmentAttributes(session, flowFile, splitList);
session.transfer(splitList, REL_SPLITS);
flowFile = FragmentAttributes.copyAttributesToOriginal(session, flowFile, fragmentId, splitList.size());
session.transfer(flowFile, REL_ORIGINAL);
if (splitList.size() > 10) {
logger.info("Split {} into {} files", new Object[] { flowFile, splitList.size() });
} else {
logger.info("Split {} into {} files: {}", new Object[] { flowFile, splitList.size(), splitList });
}
}
Aggregations