use of org.apache.nifi.stream.io.util.TextLineDemarcator in project nifi by apache.
the class SplitText method onTrigger.
/**
* Will split the incoming stream releasing all splits as FlowFile at once.
*/
@Override
public void onTrigger(ProcessContext context, ProcessSession processSession) throws ProcessException {
FlowFile sourceFlowFile = processSession.get();
if (sourceFlowFile == null) {
return;
}
AtomicBoolean error = new AtomicBoolean();
List<SplitInfo> computedSplitsInfo = new ArrayList<>();
AtomicReference<SplitInfo> headerSplitInfoRef = new AtomicReference<>();
processSession.read(sourceFlowFile, new InputStreamCallback() {
@Override
public void process(InputStream in) throws IOException {
TextLineDemarcator demarcator = new TextLineDemarcator(in);
SplitInfo splitInfo = null;
long startOffset = 0;
// Compute fragment representing the header (if available)
long start = System.nanoTime();
try {
if (SplitText.this.headerLineCount > 0) {
splitInfo = SplitText.this.computeHeader(demarcator, startOffset, SplitText.this.headerLineCount, null, null);
if ((splitInfo != null) && (splitInfo.lineCount < SplitText.this.headerLineCount)) {
error.set(true);
getLogger().error("Unable to split " + sourceFlowFile + " due to insufficient amount of header lines. Required " + SplitText.this.headerLineCount + " but was " + splitInfo.lineCount + ". Routing to failure.");
}
} else if (SplitText.this.headerMarker != null) {
splitInfo = SplitText.this.computeHeader(demarcator, startOffset, Long.MAX_VALUE, SplitText.this.headerMarker.getBytes(StandardCharsets.UTF_8), null);
}
headerSplitInfoRef.set(splitInfo);
} catch (IllegalStateException e) {
error.set(true);
getLogger().error(e.getMessage() + " Routing to failure.", e);
}
// Compute and collect fragments representing the individual splits
if (!error.get()) {
if (headerSplitInfoRef.get() != null) {
startOffset = headerSplitInfoRef.get().length;
}
long preAccumulatedLength = startOffset;
while ((splitInfo = SplitText.this.nextSplit(demarcator, startOffset, SplitText.this.lineCount, splitInfo, preAccumulatedLength)) != null) {
computedSplitsInfo.add(splitInfo);
startOffset += splitInfo.length;
}
long stop = System.nanoTime();
if (getLogger().isDebugEnabled()) {
getLogger().debug("Computed splits in " + (stop - start) + " milliseconds.");
}
}
}
});
if (error.get()) {
processSession.transfer(sourceFlowFile, REL_FAILURE);
} else {
final String fragmentId = UUID.randomUUID().toString();
List<FlowFile> splitFlowFiles = this.generateSplitFlowFiles(fragmentId, sourceFlowFile, headerSplitInfoRef.get(), computedSplitsInfo, processSession);
final FlowFile originalFlowFile = FragmentAttributes.copyAttributesToOriginal(processSession, sourceFlowFile, fragmentId, splitFlowFiles.size());
processSession.transfer(originalFlowFile, REL_ORIGINAL);
if (!splitFlowFiles.isEmpty()) {
processSession.transfer(splitFlowFiles, REL_SPLITS);
}
}
}
Aggregations