use of org.apache.nifi.util.search.SearchTerm in project nifi by apache.
the class ScanContent method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final ComponentLog logger = getLogger();
final SynchronousFileWatcher fileWatcher = fileWatcherRef.get();
try {
if (fileWatcher.checkAndReset()) {
reloadDictionary(context, true, logger);
}
} catch (final IOException e) {
throw new ProcessException(e);
}
Search<byte[]> search = searchRef.get();
try {
if (search == null) {
if (reloadDictionary(context, false, logger)) {
search = searchRef.get();
}
}
} catch (final IOException e) {
throw new ProcessException(e);
}
if (search == null) {
return;
}
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final Search<byte[]> finalSearch = search;
final AtomicReference<SearchTerm<byte[]>> termRef = new AtomicReference<>(null);
termRef.set(null);
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream rawIn) throws IOException {
try (final InputStream in = new BufferedInputStream(rawIn)) {
final SearchState<byte[]> searchResult = finalSearch.search(in, false);
if (searchResult.foundMatch()) {
termRef.set(searchResult.getResults().keySet().iterator().next());
}
}
}
});
final SearchTerm<byte[]> matchingTerm = termRef.get();
if (matchingTerm == null) {
logger.info("Routing {} to 'unmatched'", new Object[] { flowFile });
session.getProvenanceReporter().route(flowFile, REL_NO_MATCH);
session.transfer(flowFile, REL_NO_MATCH);
} else {
final String matchingTermString = matchingTerm.toString(UTF8);
logger.info("Routing {} to 'matched' because it matched term {}", new Object[] { flowFile, matchingTermString });
flowFile = session.putAttribute(flowFile, MATCH_ATTRIBUTE_KEY, matchingTermString);
session.getProvenanceReporter().route(flowFile, REL_MATCH);
session.transfer(flowFile, REL_MATCH);
}
}
use of org.apache.nifi.util.search.SearchTerm in project nifi by apache.
the class ScanContent method reloadDictionary.
private boolean reloadDictionary(final ProcessContext context, final boolean force, final ComponentLog logger) throws IOException {
boolean obtainedLock;
if (force) {
dictionaryUpdateLock.lock();
obtainedLock = true;
} else {
obtainedLock = dictionaryUpdateLock.tryLock();
}
if (obtainedLock) {
try {
final Search<byte[]> search = new AhoCorasick<>();
final Set<SearchTerm<byte[]>> terms = new HashSet<>();
final InputStream inStream = Files.newInputStream(Paths.get(context.getProperty(DICTIONARY).getValue()), StandardOpenOption.READ);
final TermLoader termLoader;
if (context.getProperty(DICTIONARY_ENCODING).getValue().equalsIgnoreCase(TEXT_ENCODING)) {
termLoader = new TextualTermLoader(inStream);
} else {
termLoader = new BinaryTermLoader(inStream);
}
try {
SearchTerm<byte[]> term;
while ((term = termLoader.nextTerm()) != null) {
terms.add(term);
}
search.initializeDictionary(terms);
searchRef.set(search);
logger.info("Loaded search dictionary from {}", new Object[] { context.getProperty(DICTIONARY).getValue() });
return true;
} finally {
termLoader.close();
}
} finally {
dictionaryUpdateLock.unlock();
}
} else {
return false;
}
}
Aggregations