Search in sources :

Example 1 with SearchTerm

use of org.apache.nifi.util.search.SearchTerm in project nifi by apache.

the class ScanContent method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLogger();
    final SynchronousFileWatcher fileWatcher = fileWatcherRef.get();
    try {
        if (fileWatcher.checkAndReset()) {
            reloadDictionary(context, true, logger);
        }
    } catch (final IOException e) {
        throw new ProcessException(e);
    }
    Search<byte[]> search = searchRef.get();
    try {
        if (search == null) {
            if (reloadDictionary(context, false, logger)) {
                search = searchRef.get();
            }
        }
    } catch (final IOException e) {
        throw new ProcessException(e);
    }
    if (search == null) {
        return;
    }
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final Search<byte[]> finalSearch = search;
    final AtomicReference<SearchTerm<byte[]>> termRef = new AtomicReference<>(null);
    termRef.set(null);
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream rawIn) throws IOException {
            try (final InputStream in = new BufferedInputStream(rawIn)) {
                final SearchState<byte[]> searchResult = finalSearch.search(in, false);
                if (searchResult.foundMatch()) {
                    termRef.set(searchResult.getResults().keySet().iterator().next());
                }
            }
        }
    });
    final SearchTerm<byte[]> matchingTerm = termRef.get();
    if (matchingTerm == null) {
        logger.info("Routing {} to 'unmatched'", new Object[] { flowFile });
        session.getProvenanceReporter().route(flowFile, REL_NO_MATCH);
        session.transfer(flowFile, REL_NO_MATCH);
    } else {
        final String matchingTermString = matchingTerm.toString(UTF8);
        logger.info("Routing {} to 'matched' because it matched term {}", new Object[] { flowFile, matchingTermString });
        flowFile = session.putAttribute(flowFile, MATCH_ATTRIBUTE_KEY, matchingTermString);
        session.getProvenanceReporter().route(flowFile, REL_MATCH);
        session.transfer(flowFile, REL_MATCH);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) SynchronousFileWatcher(org.apache.nifi.util.file.monitor.SynchronousFileWatcher) DataInputStream(java.io.DataInputStream) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStream(java.io.InputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) SearchTerm(org.apache.nifi.util.search.SearchTerm) ComponentLog(org.apache.nifi.logging.ComponentLog) ProcessException(org.apache.nifi.processor.exception.ProcessException) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) SearchState(org.apache.nifi.util.search.ahocorasick.SearchState) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback)

Example 2 with SearchTerm

use of org.apache.nifi.util.search.SearchTerm in project nifi by apache.

the class ScanContent method reloadDictionary.

private boolean reloadDictionary(final ProcessContext context, final boolean force, final ComponentLog logger) throws IOException {
    boolean obtainedLock;
    if (force) {
        dictionaryUpdateLock.lock();
        obtainedLock = true;
    } else {
        obtainedLock = dictionaryUpdateLock.tryLock();
    }
    if (obtainedLock) {
        try {
            final Search<byte[]> search = new AhoCorasick<>();
            final Set<SearchTerm<byte[]>> terms = new HashSet<>();
            final InputStream inStream = Files.newInputStream(Paths.get(context.getProperty(DICTIONARY).getValue()), StandardOpenOption.READ);
            final TermLoader termLoader;
            if (context.getProperty(DICTIONARY_ENCODING).getValue().equalsIgnoreCase(TEXT_ENCODING)) {
                termLoader = new TextualTermLoader(inStream);
            } else {
                termLoader = new BinaryTermLoader(inStream);
            }
            try {
                SearchTerm<byte[]> term;
                while ((term = termLoader.nextTerm()) != null) {
                    terms.add(term);
                }
                search.initializeDictionary(terms);
                searchRef.set(search);
                logger.info("Loaded search dictionary from {}", new Object[] { context.getProperty(DICTIONARY).getValue() });
                return true;
            } finally {
                termLoader.close();
            }
        } finally {
            dictionaryUpdateLock.unlock();
        }
    } else {
        return false;
    }
}
Also used : DataInputStream(java.io.DataInputStream) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStream(java.io.InputStream) AhoCorasick(org.apache.nifi.util.search.ahocorasick.AhoCorasick) SearchTerm(org.apache.nifi.util.search.SearchTerm) HashSet(java.util.HashSet)

Aggregations

DataInputStream (java.io.DataInputStream)2 InputStream (java.io.InputStream)2 BufferedInputStream (org.apache.nifi.stream.io.BufferedInputStream)2 SearchTerm (org.apache.nifi.util.search.SearchTerm)2 IOException (java.io.IOException)1 HashSet (java.util.HashSet)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 FlowFile (org.apache.nifi.flowfile.FlowFile)1 ComponentLog (org.apache.nifi.logging.ComponentLog)1 ProcessException (org.apache.nifi.processor.exception.ProcessException)1 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)1 SynchronousFileWatcher (org.apache.nifi.util.file.monitor.SynchronousFileWatcher)1 AhoCorasick (org.apache.nifi.util.search.ahocorasick.AhoCorasick)1 SearchState (org.apache.nifi.util.search.ahocorasick.SearchState)1