use of org.apache.nifi.provenance.index.EventIndexSearcher in project nifi by apache.
the class TestPersistentProvenanceRepository method testModifyIndexWhileSearching.
@Test(timeout = 10000)
public void testModifyIndexWhileSearching() throws IOException, InterruptedException, ParseException {
assumeFalse(isWindowsEnvironment());
final RepositoryConfiguration config = createConfiguration();
config.setMaxRecordLife(30, TimeUnit.SECONDS);
config.setMaxStorageCapacity(1024L * 1024L * 10);
config.setMaxEventFileLife(500, TimeUnit.MILLISECONDS);
config.setMaxEventFileCapacity(1024L * 1024L * 10);
config.setSearchableFields(new ArrayList<>(SearchableFields.getStandardFields()));
final CountDownLatch obtainIndexSearcherLatch = new CountDownLatch(2);
repo = new PersistentProvenanceRepository(config, DEFAULT_ROLLOVER_MILLIS) {
private CachingIndexManager wrappedManager = null;
// Create an IndexManager that adds a delay before returning the Index Searcher.
@Override
protected synchronized CachingIndexManager getIndexManager() {
if (wrappedManager == null) {
final IndexManager mgr = super.getIndexManager();
final Logger logger = LoggerFactory.getLogger("IndexManager");
wrappedManager = new CachingIndexManager() {
final AtomicInteger indexSearcherCount = new AtomicInteger(0);
@Override
public EventIndexSearcher borrowIndexSearcher(File indexDir) throws IOException {
final EventIndexSearcher searcher = mgr.borrowIndexSearcher(indexDir);
final int idx = indexSearcherCount.incrementAndGet();
obtainIndexSearcherLatch.countDown();
// second thread is still holding the searcher
try {
if (idx == 1) {
Thread.sleep(3000L);
} else {
Thread.sleep(5000L);
}
} catch (InterruptedException e) {
throw new IOException("Interrupted", e);
}
logger.info("Releasing index searcher");
return searcher;
}
@Override
public EventIndexWriter borrowIndexWriter(File indexingDirectory) throws IOException {
return mgr.borrowIndexWriter(indexingDirectory);
}
@Override
public void close() throws IOException {
mgr.close();
}
@Override
public boolean removeIndex(File indexDirectory) {
mgr.removeIndex(indexDirectory);
return true;
}
@Override
public void returnIndexSearcher(EventIndexSearcher searcher) {
mgr.returnIndexSearcher(searcher);
}
@Override
public void returnIndexWriter(EventIndexWriter writer) {
mgr.returnIndexWriter(writer);
}
};
}
return wrappedManager;
}
};
repo.initialize(getEventReporter(), null, null, IdentifierLookup.EMPTY);
final String uuid = "10000000-0000-0000-0000-000000000000";
final Map<String, String> attributes = new HashMap<>();
attributes.put("abc", "xyz");
attributes.put("xyz", "abc");
attributes.put("filename", "file-" + uuid);
final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder();
builder.setEventTime(System.currentTimeMillis());
builder.setEventType(ProvenanceEventType.RECEIVE);
builder.setTransitUri("nifi://unit-test");
attributes.put("uuid", uuid);
builder.fromFlowFile(createFlowFile(3L, 3000L, attributes));
builder.setComponentId("1234");
builder.setComponentType("dummy processor");
for (int i = 0; i < 10; i++) {
builder.fromFlowFile(createFlowFile(i, 3000L, attributes));
attributes.put("uuid", "00000000-0000-0000-0000-00000000000" + i);
repo.registerEvent(builder.build());
}
repo.waitForRollover();
// Perform a query. This will ensure that an IndexSearcher is created and cached.
final Query query = new Query(UUID.randomUUID().toString());
query.addSearchTerm(SearchTerms.newSearchTerm(SearchableFields.Filename, "file-*"));
query.addSearchTerm(SearchTerms.newSearchTerm(SearchableFields.ComponentID, "12?4"));
query.addSearchTerm(SearchTerms.newSearchTerm(SearchableFields.TransitURI, "nifi://*"));
query.setMaxResults(100);
// Run a query in a background thread. When this thread goes to obtain the IndexSearcher, it will have a 5 second delay.
// That delay will occur as the main thread is updating the index. This should result in the search creating a new Index Reader
// that can properly query the index.
final int numThreads = 2;
final CountDownLatch performSearchLatch = new CountDownLatch(numThreads);
final Runnable searchRunnable = new Runnable() {
@Override
public void run() {
QueryResult result;
try {
result = repo.queryEvents(query, createUser());
} catch (IOException e) {
e.printStackTrace();
Assert.fail(e.toString());
return;
}
System.out.println("Finished search: " + result);
performSearchLatch.countDown();
}
};
// Kick off the searcher threads
for (int i = 0; i < numThreads; i++) {
final Thread searchThread = new Thread(searchRunnable);
searchThread.start();
}
// Wait until we've obtained the Index Searchers before modifying the index.
obtainIndexSearcherLatch.await();
// add more events to the repo
for (int i = 0; i < 10; i++) {
builder.fromFlowFile(createFlowFile(i, 3000L, attributes));
attributes.put("uuid", "00000000-0000-0000-0000-00000000000" + i);
repo.registerEvent(builder.build());
}
// Force a rollover to occur. This will modify the index.
repo.rolloverWithLock(true);
// Wait for the repository to roll over.
repo.waitForRollover();
// Wait for the searches to complete.
performSearchLatch.await();
}
use of org.apache.nifi.provenance.index.EventIndexSearcher in project nifi by apache.
the class TestCachingIndexManager method test.
@Test
public void test() throws IOException {
// Create and IndexWriter and add a document to the index, then close the writer.
// This gives us something that we can query.
final EventIndexWriter writer = manager.borrowIndexWriter(indexDir);
final Document doc = new Document();
doc.add(new StringField("unit test", "true", Store.YES));
writer.index(doc, 1000);
manager.returnIndexWriter(writer);
// Get an Index Searcher that we can use to query the index.
final EventIndexSearcher cachedSearcher = manager.borrowIndexSearcher(indexDir);
// Ensure that we get the expected results.
assertCount(cachedSearcher, 1);
// While we already have an Index Searcher, get a writer for the same index.
// This will cause the Index Searcher to be marked as poisoned.
final EventIndexWriter writer2 = manager.borrowIndexWriter(indexDir);
// Obtain a new Index Searcher with the writer open. This Index Searcher should *NOT*
// be the same as the previous searcher because the new one will be a Near-Real-Time Index Searcher
// while the other is not.
final EventIndexSearcher nrtSearcher = manager.borrowIndexSearcher(indexDir);
assertNotSame(cachedSearcher, nrtSearcher);
// Ensure that we get the expected query results.
assertCount(nrtSearcher, 1);
// Return the writer, so that there is no longer an active writer for the index.
manager.returnIndexWriter(writer2);
// Ensure that we still get the same result.
assertCount(cachedSearcher, 1);
manager.returnIndexSearcher(cachedSearcher);
// Ensure that our near-real-time index searcher still gets the same result.
assertCount(nrtSearcher, 1);
manager.returnIndexSearcher(nrtSearcher);
}
use of org.apache.nifi.provenance.index.EventIndexSearcher in project nifi by apache.
the class LuceneEventIndex method getMaxEventId.
long getMaxEventId(final String partitionName) {
final List<File> allDirectories = getDirectoryManager().getDirectories(0L, Long.MAX_VALUE, partitionName);
if (allDirectories.isEmpty()) {
return -1L;
}
Collections.sort(allDirectories, DirectoryUtils.NEWEST_INDEX_FIRST);
for (final File directory : allDirectories) {
final EventIndexSearcher searcher;
try {
searcher = indexManager.borrowIndexSearcher(directory);
} catch (final IOException ioe) {
logger.warn("Unable to read from Index Directory {}. Will assume that the index is incomplete and not consider this index when determining max event ID", directory);
continue;
}
try {
final IndexReader reader = searcher.getIndexSearcher().getIndexReader();
final int maxDocId = reader.maxDoc() - 1;
final Document document = reader.document(maxDocId);
final long eventId = document.getField(SearchableFields.Identifier.getSearchableFieldName()).numericValue().longValue();
logger.info("Determined that Max Event ID indexed for Partition {} is approximately {} based on index {}", partitionName, eventId, directory);
return eventId;
} catch (final IOException ioe) {
logger.warn("Unable to search Index Directory {}. Will assume that the index is incomplete and not consider this index when determining max event ID", directory, ioe);
} finally {
indexManager.returnIndexSearcher(searcher);
}
}
return -1L;
}
use of org.apache.nifi.provenance.index.EventIndexSearcher in project nifi by apache.
the class QueryTask method run.
@Override
public void run() {
if (queryResult.getTotalHitCount() >= maxResults) {
logger.debug("Will not query lucene index {} because maximum results have already been obtained", indexDir);
queryResult.update(Collections.emptyList(), 0L);
return;
}
if (queryResult.isFinished()) {
logger.debug("Will not query lucene index {} because the query is already finished", indexDir);
return;
}
final long borrowStart = System.nanoTime();
final EventIndexSearcher searcher;
try {
searcher = indexManager.borrowIndexSearcher(indexDir);
} catch (final FileNotFoundException fnfe) {
// We do not consider this an error because it may well just be the case that the event index has aged off and
// been deleted or that we've just created the index and haven't yet committed the writer. So instead, we just
// update the result ot indicate that this index search is complete with no results.
queryResult.update(Collections.emptyList(), 0);
// nothing has been indexed yet, or the data has already aged off
logger.info("Attempted to search Provenance Index {} but could not find the directory or the directory did not contain a valid Lucene index. " + "This usually indicates that either the index was just created and hasn't fully been initialized, or that the index was recently aged off.", indexDir);
return;
} catch (final IOException ioe) {
queryResult.setError("Failed to query index " + indexDir + "; see logs for more details");
logger.error("Failed to query index " + indexDir, ioe);
return;
}
try {
final long borrowMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - borrowStart);
logger.debug("Borrowing index searcher for {} took {} ms", indexDir, borrowMillis);
final long startNanos = System.nanoTime();
// If max number of results are retrieved, do not bother querying lucene
if (queryResult.getTotalHitCount() >= maxResults) {
logger.debug("Will not query lucene index {} because maximum results have already been obtained", indexDir);
queryResult.update(Collections.emptyList(), 0L);
return;
}
if (queryResult.isFinished()) {
logger.debug("Will not query lucene index {} because the query is already finished", indexDir);
return;
}
// Query lucene
final IndexReader indexReader = searcher.getIndexSearcher().getIndexReader();
final TopDocs topDocs;
try {
topDocs = searcher.getIndexSearcher().search(query, maxResults);
} catch (final Exception e) {
logger.error("Failed to query Lucene for index " + indexDir, e);
queryResult.setError("Failed to query Lucene for index " + indexDir + " due to " + e);
return;
} finally {
final long ms = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos);
logger.debug("Querying Lucene for index {} took {} ms", indexDir, ms);
}
// If max number of results are retrieved, do not bother reading docs
if (queryResult.getTotalHitCount() >= maxResults) {
logger.debug("Will not read events from store for {} because maximum results have already been obtained", indexDir);
queryResult.update(Collections.emptyList(), 0L);
return;
}
if (queryResult.isFinished()) {
logger.debug("Will not read events from store for {} because the query has already finished", indexDir);
return;
}
final Tuple<List<ProvenanceEventRecord>, Integer> eventsAndTotalHits = readDocuments(topDocs, indexReader);
if (eventsAndTotalHits == null) {
queryResult.update(Collections.emptyList(), 0L);
logger.info("Will not update query results for queried index {} for query {} because the maximum number of results have been reached already", indexDir, query);
} else {
queryResult.update(eventsAndTotalHits.getKey(), eventsAndTotalHits.getValue());
final long searchNanos = System.nanoTime() - startNanos;
final long millis = TimeUnit.NANOSECONDS.toMillis(searchNanos);
logger.info("Successfully queried index {} for query {}; retrieved {} events with a total of {} hits in {} millis", indexDir, query, eventsAndTotalHits.getKey().size(), eventsAndTotalHits.getValue(), millis);
}
} catch (final Exception e) {
logger.error("Failed to query events against index " + indexDir, e);
queryResult.setError("Failed to complete query due to " + e);
} finally {
indexManager.returnIndexSearcher(searcher);
}
}
use of org.apache.nifi.provenance.index.EventIndexSearcher in project nifi by apache.
the class IndexSearch method search.
public StandardQueryResult search(final org.apache.nifi.provenance.search.Query provenanceQuery, final NiFiUser user, final AtomicInteger retrievedCount, final long firstEventTimestamp) throws IOException {
if (retrievedCount.get() >= provenanceQuery.getMaxResults()) {
final StandardQueryResult sqr = new StandardQueryResult(provenanceQuery, 1);
sqr.update(Collections.<ProvenanceEventRecord>emptyList(), 0L);
logger.info("Skipping search of Provenance Index {} for {} because the max number of results ({}) has already been retrieved", indexDirectory, provenanceQuery, provenanceQuery.getMaxResults());
return sqr;
}
final long startNanos = System.nanoTime();
if (!indexDirectory.exists() && !indexDirectory.mkdirs()) {
throw new IOException("Unable to create Indexing Directory " + indexDirectory);
}
if (!indexDirectory.isDirectory()) {
throw new IOException("Indexing Directory specified is " + indexDirectory + ", but this is not a directory");
}
final StandardQueryResult sqr = new StandardQueryResult(provenanceQuery, 1);
final Set<ProvenanceEventRecord> matchingRecords;
// the repository, and we don't want those events to count toward the total number of matches.
if (provenanceQuery.getStartDate() == null || provenanceQuery.getStartDate().getTime() < firstEventTimestamp) {
provenanceQuery.setStartDate(new Date(firstEventTimestamp));
}
if (provenanceQuery.getEndDate() == null) {
provenanceQuery.setEndDate(new Date());
}
final Query luceneQuery = LuceneUtil.convertQuery(provenanceQuery);
final long start = System.nanoTime();
EventIndexSearcher searcher = null;
try {
searcher = indexManager.borrowIndexSearcher(indexDirectory);
final long searchStartNanos = System.nanoTime();
final long openSearcherNanos = searchStartNanos - start;
logger.debug("Searching {} for {}", this, provenanceQuery);
final TopDocs topDocs = searcher.getIndexSearcher().search(luceneQuery, provenanceQuery.getMaxResults());
final long finishSearch = System.nanoTime();
final long searchNanos = finishSearch - searchStartNanos;
logger.debug("Searching {} for {} took {} millis; opening searcher took {} millis", this, provenanceQuery, TimeUnit.NANOSECONDS.toMillis(searchNanos), TimeUnit.NANOSECONDS.toMillis(openSearcherNanos));
if (topDocs.totalHits == 0) {
sqr.update(Collections.<ProvenanceEventRecord>emptyList(), 0);
return sqr;
}
final DocsReader docsReader = new DocsReader();
final EventAuthorizer authorizer = new EventAuthorizer() {
@Override
public boolean isAuthorized(ProvenanceEventRecord event) {
return repository.isAuthorized(event, user);
}
@Override
public void authorize(ProvenanceEventRecord event) throws AccessDeniedException {
repository.authorize(event, user);
}
@Override
public List<ProvenanceEventRecord> filterUnauthorizedEvents(List<ProvenanceEventRecord> events) {
return repository.filterUnauthorizedEvents(events, user);
}
@Override
public Set<ProvenanceEventRecord> replaceUnauthorizedWithPlaceholders(Set<ProvenanceEventRecord> events) {
return repository.replaceUnauthorizedWithPlaceholders(events, user);
}
};
matchingRecords = docsReader.read(topDocs, authorizer, searcher.getIndexSearcher().getIndexReader(), repository.getAllLogFiles(), retrievedCount, provenanceQuery.getMaxResults(), maxAttributeChars);
final long readRecordsNanos = System.nanoTime() - finishSearch;
logger.debug("Reading {} records took {} millis for {}", matchingRecords.size(), TimeUnit.NANOSECONDS.toMillis(readRecordsNanos), this);
sqr.update(matchingRecords, topDocs.totalHits);
final long queryNanos = System.nanoTime() - startNanos;
logger.info("Successfully executed {} against Index {}; Search took {} milliseconds; Total Hits = {}", provenanceQuery, indexDirectory, TimeUnit.NANOSECONDS.toMillis(queryNanos), topDocs.totalHits);
return sqr;
} catch (final FileNotFoundException e) {
// nothing has been indexed yet, or the data has already aged off
logger.warn("Attempted to search Provenance Index {} but could not find the file due to {}", indexDirectory, e);
if (logger.isDebugEnabled()) {
logger.warn("", e);
}
sqr.update(Collections.<ProvenanceEventRecord>emptyList(), 0);
return sqr;
} finally {
if (searcher != null) {
indexManager.returnIndexSearcher(searcher);
}
}
}
Aggregations