Search in sources :

Example 1 with SearchState

use of org.apache.nifi.util.search.ahocorasick.SearchState in project nifi by apache.

the class ScanContent method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLogger();
    final SynchronousFileWatcher fileWatcher = fileWatcherRef.get();
    try {
        if (fileWatcher.checkAndReset()) {
            reloadDictionary(context, true, logger);
        }
    } catch (final IOException e) {
        throw new ProcessException(e);
    }
    Search<byte[]> search = searchRef.get();
    try {
        if (search == null) {
            if (reloadDictionary(context, false, logger)) {
                search = searchRef.get();
            }
        }
    } catch (final IOException e) {
        throw new ProcessException(e);
    }
    if (search == null) {
        return;
    }
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final Search<byte[]> finalSearch = search;
    final AtomicReference<SearchTerm<byte[]>> termRef = new AtomicReference<>(null);
    termRef.set(null);
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream rawIn) throws IOException {
            try (final InputStream in = new BufferedInputStream(rawIn)) {
                final SearchState<byte[]> searchResult = finalSearch.search(in, false);
                if (searchResult.foundMatch()) {
                    termRef.set(searchResult.getResults().keySet().iterator().next());
                }
            }
        }
    });
    final SearchTerm<byte[]> matchingTerm = termRef.get();
    if (matchingTerm == null) {
        logger.info("Routing {} to 'unmatched'", new Object[] { flowFile });
        session.getProvenanceReporter().route(flowFile, REL_NO_MATCH);
        session.transfer(flowFile, REL_NO_MATCH);
    } else {
        final String matchingTermString = matchingTerm.toString(UTF8);
        logger.info("Routing {} to 'matched' because it matched term {}", new Object[] { flowFile, matchingTermString });
        flowFile = session.putAttribute(flowFile, MATCH_ATTRIBUTE_KEY, matchingTermString);
        session.getProvenanceReporter().route(flowFile, REL_MATCH);
        session.transfer(flowFile, REL_MATCH);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) SynchronousFileWatcher(org.apache.nifi.util.file.monitor.SynchronousFileWatcher) DataInputStream(java.io.DataInputStream) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStream(java.io.InputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) SearchTerm(org.apache.nifi.util.search.SearchTerm) ComponentLog(org.apache.nifi.logging.ComponentLog) ProcessException(org.apache.nifi.processor.exception.ProcessException) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) SearchState(org.apache.nifi.util.search.ahocorasick.SearchState) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback)

Aggregations

DataInputStream (java.io.DataInputStream)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 FlowFile (org.apache.nifi.flowfile.FlowFile)1 ComponentLog (org.apache.nifi.logging.ComponentLog)1 ProcessException (org.apache.nifi.processor.exception.ProcessException)1 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)1 BufferedInputStream (org.apache.nifi.stream.io.BufferedInputStream)1 SynchronousFileWatcher (org.apache.nifi.util.file.monitor.SynchronousFileWatcher)1 SearchTerm (org.apache.nifi.util.search.SearchTerm)1 SearchState (org.apache.nifi.util.search.ahocorasick.SearchState)1