Examples with TextLineDemarcator - org.apache.nifi.stream.io.util.TextLineDemarcator

Example 1 with TextLineDemarcator

use of org.apache.nifi.stream.io.util.TextLineDemarcator in project nifi by apache.

the class SplitText method onTrigger.

/**
 * Will split the incoming stream releasing all splits as FlowFile at once.
 */
@Override
public void onTrigger(ProcessContext context, ProcessSession processSession) throws ProcessException {
    FlowFile sourceFlowFile = processSession.get();
    if (sourceFlowFile == null) {
        return;
    }
    AtomicBoolean error = new AtomicBoolean();
    List<SplitInfo> computedSplitsInfo = new ArrayList<>();
    AtomicReference<SplitInfo> headerSplitInfoRef = new AtomicReference<>();
    processSession.read(sourceFlowFile, new InputStreamCallback() {

        @Override
        public void process(InputStream in) throws IOException {
            TextLineDemarcator demarcator = new TextLineDemarcator(in);
            SplitInfo splitInfo = null;
            long startOffset = 0;
            // Compute fragment representing the header (if available)
            long start = System.nanoTime();
            try {
                if (SplitText.this.headerLineCount > 0) {
                    splitInfo = SplitText.this.computeHeader(demarcator, startOffset, SplitText.this.headerLineCount, null, null);
                    if ((splitInfo != null) && (splitInfo.lineCount < SplitText.this.headerLineCount)) {
                        error.set(true);
                        getLogger().error("Unable to split " + sourceFlowFile + " due to insufficient amount of header lines. Required " + SplitText.this.headerLineCount + " but was " + splitInfo.lineCount + ". Routing to failure.");
                    }
                } else if (SplitText.this.headerMarker != null) {
                    splitInfo = SplitText.this.computeHeader(demarcator, startOffset, Long.MAX_VALUE, SplitText.this.headerMarker.getBytes(StandardCharsets.UTF_8), null);
                }
                headerSplitInfoRef.set(splitInfo);
            } catch (IllegalStateException e) {
                error.set(true);
                getLogger().error(e.getMessage() + " Routing to failure.", e);
            }
            // Compute and collect fragments representing the individual splits
            if (!error.get()) {
                if (headerSplitInfoRef.get() != null) {
                    startOffset = headerSplitInfoRef.get().length;
                }
                long preAccumulatedLength = startOffset;
                while ((splitInfo = SplitText.this.nextSplit(demarcator, startOffset, SplitText.this.lineCount, splitInfo, preAccumulatedLength)) != null) {
                    computedSplitsInfo.add(splitInfo);
                    startOffset += splitInfo.length;
                }
                long stop = System.nanoTime();
                if (getLogger().isDebugEnabled()) {
                    getLogger().debug("Computed splits in " + (stop - start) + " milliseconds.");
                }
            }
        }
    });
    if (error.get()) {
        processSession.transfer(sourceFlowFile, REL_FAILURE);
    } else {
        final String fragmentId = UUID.randomUUID().toString();
        List<FlowFile> splitFlowFiles = this.generateSplitFlowFiles(fragmentId, sourceFlowFile, headerSplitInfoRef.get(), computedSplitsInfo, processSession);
        final FlowFile originalFlowFile = FragmentAttributes.copyAttributesToOriginal(processSession, sourceFlowFile, fragmentId, splitFlowFiles.size());
        processSession.transfer(originalFlowFile, REL_ORIGINAL);
        if (!splitFlowFiles.isEmpty()) {
            processSession.transfer(splitFlowFiles, REL_SPLITS);
        }
    }
}

Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) TextLineDemarcator(org.apache.nifi.stream.io.util.TextLineDemarcator) ArrayList(java.util.ArrayList) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback)

Aggregations

IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 ArrayList (java.util.ArrayList)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 FlowFile (org.apache.nifi.flowfile.FlowFile)1 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)1 TextLineDemarcator (org.apache.nifi.stream.io.util.TextLineDemarcator)1