use of org.apache.nifi.provenance.index.EventIndexWriter in project nifi by apache.
the class SimpleIndexManager method createWriter.
private IndexWriterCount createWriter(final File indexDirectory) throws IOException {
final List<Closeable> closeables = new ArrayList<>();
final Directory directory = FSDirectory.open(indexDirectory);
closeables.add(directory);
try {
final Analyzer analyzer = new StandardAnalyzer();
closeables.add(analyzer);
final IndexWriterConfig config = new IndexWriterConfig(LuceneUtil.LUCENE_VERSION, analyzer);
final ConcurrentMergeScheduler mergeScheduler = new ConcurrentMergeScheduler();
final int mergeThreads = repoConfig.getConcurrentMergeThreads();
mergeScheduler.setMaxMergesAndThreads(mergeThreads, mergeThreads);
config.setMergeScheduler(mergeScheduler);
final IndexWriter indexWriter = new IndexWriter(directory, config);
final EventIndexWriter eventIndexWriter = new LuceneEventIndexWriter(indexWriter, indexDirectory);
final IndexWriterCount writerCount = new IndexWriterCount(eventIndexWriter, analyzer, directory, 1, false);
logger.debug("Providing new index writer for {}", indexDirectory);
return writerCount;
} catch (final IOException ioe) {
for (final Closeable closeable : closeables) {
try {
closeable.close();
} catch (final IOException ioe2) {
ioe.addSuppressed(ioe2);
}
}
throw ioe;
}
}
use of org.apache.nifi.provenance.index.EventIndexWriter in project nifi by apache.
the class TestSimpleIndexManager method testMultipleWritersSimultaneouslySameIndex.
@Test
public void testMultipleWritersSimultaneouslySameIndex() throws IOException {
final SimpleIndexManager mgr = new SimpleIndexManager(new RepositoryConfiguration());
final File dir = new File("target/" + UUID.randomUUID().toString());
try {
final EventIndexWriter writer1 = mgr.borrowIndexWriter(dir);
final EventIndexWriter writer2 = mgr.borrowIndexWriter(dir);
final Document doc1 = new Document();
doc1.add(new StringField("id", "1", Store.YES));
final Document doc2 = new Document();
doc2.add(new StringField("id", "2", Store.YES));
writer1.index(doc1, 1000);
writer2.index(doc2, 1000);
mgr.returnIndexWriter(writer2);
mgr.returnIndexWriter(writer1);
final EventIndexSearcher searcher = mgr.borrowIndexSearcher(dir);
final TopDocs topDocs = searcher.getIndexSearcher().search(new MatchAllDocsQuery(), 2);
assertEquals(2, topDocs.totalHits);
mgr.returnIndexSearcher(searcher);
} finally {
FileUtils.deleteFile(dir, true);
}
}
use of org.apache.nifi.provenance.index.EventIndexWriter in project nifi by apache.
the class TestSimpleIndexManager method testWriterCloseIfOnlyUser.
@Test
public void testWriterCloseIfOnlyUser() throws IOException {
final AtomicInteger closeCount = new AtomicInteger(0);
final SimpleIndexManager mgr = new SimpleIndexManager(new RepositoryConfiguration()) {
@Override
protected void close(IndexWriterCount count) throws IOException {
closeCount.incrementAndGet();
}
};
final File dir = new File("target/" + UUID.randomUUID().toString());
final EventIndexWriter writer = mgr.borrowIndexWriter(dir);
mgr.returnIndexWriter(writer, true, true);
assertEquals(1, closeCount.get());
}
use of org.apache.nifi.provenance.index.EventIndexWriter in project nifi by apache.
the class EventIndexTask method reIndex.
/**
* Re-indexes the documents given. The IndexableDocument's provided are required to have the IndexDirectory provided.
*/
void reIndex(final List<IndexableDocument> toIndex, final CommitPreference commitPreference) throws IOException {
if (toIndex.isEmpty()) {
return;
}
final Map<File, List<IndexableDocument>> docsByIndexDir = toIndex.stream().collect(Collectors.groupingBy(doc -> doc.getIndexDirectory()));
for (final Map.Entry<File, List<IndexableDocument>> entry : docsByIndexDir.entrySet()) {
final File indexDirectory = entry.getKey();
final List<IndexableDocument> documentsForIndex = entry.getValue();
final EventIndexWriter indexWriter = indexManager.borrowIndexWriter(indexDirectory);
try {
// Remove any documents that already exist in this index that are overlapping.
long minId = Long.MAX_VALUE;
long maxId = Long.MIN_VALUE;
for (final IndexableDocument doc : toIndex) {
final long eventId = doc.getDocument().getField(SearchableFields.Identifier.getSearchableFieldName()).numericValue().longValue();
if (eventId < minId) {
minId = eventId;
}
if (eventId > maxId) {
maxId = eventId;
}
}
final NumericRangeQuery<Long> query = NumericRangeQuery.newLongRange(SearchableFields.Identifier.getSearchableFieldName(), minId, maxId, true, true);
indexWriter.getIndexWriter().deleteDocuments(query);
final List<Document> documents = documentsForIndex.stream().map(doc -> doc.getDocument()).collect(Collectors.toList());
indexWriter.index(documents, commitThreshold);
} finally {
indexManager.returnIndexWriter(indexWriter, CommitPreference.FORCE_COMMIT.equals(commitPreference), false);
}
}
}
use of org.apache.nifi.provenance.index.EventIndexWriter in project nifi by apache.
the class CachingIndexManager method borrowIndexSearcher.
@Override
public EventIndexSearcher borrowIndexSearcher(final File indexDir) throws IOException {
final File absoluteFile = indexDir.getAbsoluteFile();
logger.trace("Borrowing index searcher for {}", indexDir);
lock.lock();
try {
// check if we already have a reader cached.
List<ActiveIndexSearcher> currentlyCached = activeSearchers.get(absoluteFile);
if (currentlyCached == null) {
currentlyCached = new ArrayList<>();
activeSearchers.put(absoluteFile, currentlyCached);
} else {
// from our cache later.
for (final ActiveIndexSearcher searcher : currentlyCached) {
if (searcher.isCache()) {
// if the searcher is poisoned, we want to close and expire it.
if (searcher.isPoisoned()) {
continue;
}
// if there are no references to the reader, it will have been closed. Since there is no
// isClosed() method, this is how we determine whether it's been closed or not.
final int refCount = searcher.getSearcher().getIndexSearcher().getIndexReader().getRefCount();
if (refCount <= 0) {
// if refCount == 0, then the reader has been closed, so we cannot use the searcher
logger.debug("Reference count for cached Index Searcher for {} is currently {}; " + "removing cached searcher", absoluteFile, refCount);
continue;
}
final int referenceCount = searcher.incrementReferenceCount();
logger.debug("Providing previously cached index searcher for {} and incrementing Reference Count to {}", indexDir, referenceCount);
return searcher.getSearcher();
}
}
}
// We found no cached Index Readers. Create a new one. To do this, we need to check
// if we have an Index Writer, and if so create a Reader based on the Index Writer.
// This will provide us a 'near real time' index reader.
final IndexWriterCount writerCount = writerCounts.remove(absoluteFile);
if (writerCount == null) {
final Directory directory = FSDirectory.open(absoluteFile);
logger.debug("No Index Writer currently exists for {}; creating a cachable reader", indexDir);
try {
final DirectoryReader directoryReader = DirectoryReader.open(directory);
final IndexSearcher searcher = new IndexSearcher(directoryReader);
final EventIndexSearcher eventIndexSearcher = new LuceneEventIndexSearcher(searcher, indexDir, directory, directoryReader);
// we want to cache the searcher that we create, since it's just a reader.
final ActiveIndexSearcher cached = new ActiveIndexSearcher(eventIndexSearcher, absoluteFile, directoryReader, directory, true);
currentlyCached.add(cached);
return cached.getSearcher();
} catch (final IOException e) {
logger.error("Failed to create Index Searcher for {} due to {}", absoluteFile, e.toString());
logger.error("", e);
try {
directory.close();
} catch (final IOException ioe) {
e.addSuppressed(ioe);
}
throw e;
}
} else {
logger.debug("Index Writer currently exists for {}; creating a non-cachable reader and incrementing " + "counter to {}", indexDir, writerCount.getCount() + 1);
// increment the writer count to ensure that it's kept open.
writerCounts.put(absoluteFile, new IndexWriterCount(writerCount.getWriter(), writerCount.getAnalyzer(), writerCount.getDirectory(), writerCount.getCount() + 1));
// create a new Index Searcher from the writer so that we don't have an issue with trying
// to read from a directory that's locked. If we get the "no segments* file found" with
// Lucene, this indicates that an IndexWriter already has the directory open.
final EventIndexWriter writer = writerCount.getWriter();
final DirectoryReader directoryReader = DirectoryReader.open(writer.getIndexWriter(), false);
final IndexSearcher searcher = new IndexSearcher(directoryReader);
final EventIndexSearcher eventIndexSearcher = new LuceneEventIndexSearcher(searcher, indexDir, null, directoryReader);
// we don't want to cache this searcher because it's based on a writer, so we want to get
// new values the next time that we search.
final ActiveIndexSearcher activeSearcher = new ActiveIndexSearcher(eventIndexSearcher, absoluteFile, directoryReader, null, false);
currentlyCached.add(activeSearcher);
return activeSearcher.getSearcher();
}
} finally {
lock.unlock();
}
}
Aggregations