use of org.apache.nifi.provenance.lucene.IndexManager in project nifi by apache.
the class WriteAheadProvenanceRepository method init.
synchronized void init(RecordWriterFactory recordWriterFactory, RecordReaderFactory recordReaderFactory, final EventReporter eventReporter, final Authorizer authorizer, final ProvenanceAuthorizableFactory resourceFactory) throws IOException {
final EventFileManager fileManager = new EventFileManager();
eventStore = new PartitionedWriteAheadEventStore(config, recordWriterFactory, recordReaderFactory, eventReporter, fileManager);
final IndexManager indexManager = new SimpleIndexManager(config);
eventIndex = new LuceneEventIndex(config, indexManager, eventReporter);
this.eventReporter = eventReporter;
this.authorizer = authorizer;
this.resourceFactory = resourceFactory;
eventStore.initialize();
eventIndex.initialize(eventStore);
try {
eventStore.reindexLatestEvents(eventIndex);
} catch (final Exception e) {
logger.error("Failed to re-index some of the Provenance Events. It is possible that some of the latest " + "events will not be available from the Provenance Repository when a query is issued.", e);
}
}
use of org.apache.nifi.provenance.lucene.IndexManager in project nifi by apache.
the class EventIndexTask method index.
private void index(final List<StoredDocument> toIndex, final String partitionName) throws IOException {
if (toIndex.isEmpty()) {
return;
}
// Convert the IndexableDocument list into a List of Documents so that we can pass them to the Index Writer.
final List<Document> documents = toIndex.stream().map(doc -> doc.getDocument()).collect(Collectors.toList());
boolean requestClose = false;
boolean requestCommit = false;
final long minEventTime = toIndex.stream().mapToLong(doc -> doc.getDocument().getField(SearchableFields.EventTime.getSearchableFieldName()).numericValue().longValue()).min().getAsLong();
// Synchronize on the directory manager because we don't want the active directory to change
// while we are obtaining an index writer for it. I.e., determining the active directory
// and obtaining an Index Writer for it need to be done atomically.
final EventIndexWriter indexWriter;
final File indexDirectory;
synchronized (directoryManager) {
indexDirectory = directoryManager.getWritableIndexingDirectory(minEventTime, partitionName);
indexWriter = indexManager.borrowIndexWriter(indexDirectory);
}
try {
// Perform the actual indexing.
boolean writerIndicatesCommit = indexWriter.index(documents, commitThreshold);
// If we don't need to commit index based on what index writer tells us, we will still want
// to commit the index if it's assigned to a partition and this is no longer the active index
// for that partition. This prevents the following case:
//
// Thread T1: pulls events from queue
// Maps events to Index Directory D1
// Thread T2: pulls events from queue
// Maps events to Index Directory D1, the active index for Partition P1.
// Writes events to D1.
// Commits Index Writer for D1.
// Closes Index Writer for D1.
// Thread T1: Writes events to D1.
// Determines that Index Writer for D1 does not need to be committed or closed.
//
// In the case outlined above, we would potentially lose those events from the index! To avoid this,
// we simply decide to commit the index if this writer is no longer the active writer for the index.
// However, if we have 10 threads, we don't want all 10 threads trying to commit the index after each
// update. We want to commit when they've all finished. This is what the IndexManager will do if we request
// that it commit the index. It will also close the index if requested, once all writers have finished.
// So when this is the case, we will request that the Index Manager both commit and close the writer.
final Optional<File> activeIndexDirOption = directoryManager.getActiveIndexDirectory(partitionName);
if (!activeIndexDirOption.isPresent() || !activeIndexDirOption.get().equals(indexDirectory)) {
requestCommit = true;
requestClose = true;
}
if (writerIndicatesCommit) {
commit(indexWriter);
// we've already committed the index writer so no need to request that the index manager do so also.
requestCommit = false;
final boolean directoryManagerIndicatesClose = directoryManager.onIndexCommitted(indexDirectory);
requestClose = requestClose || directoryManagerIndicatesClose;
if (logger.isDebugEnabled()) {
final long maxId = documents.stream().mapToLong(doc -> doc.getField(SearchableFields.Identifier.getSearchableFieldName()).numericValue().longValue()).max().orElse(-1L);
logger.debug("Committed index {} after writing a max Event ID of {}", indexDirectory, maxId);
}
}
} finally {
indexManager.returnIndexWriter(indexWriter, requestCommit, requestClose);
}
}
use of org.apache.nifi.provenance.lucene.IndexManager in project nifi by apache.
the class TestLuceneEventIndex method testGetMinimumIdToReindex.
@Test(timeout = 60000)
public void testGetMinimumIdToReindex() throws InterruptedException {
assumeFalse(isWindowsEnvironment());
final RepositoryConfiguration repoConfig = createConfig(1);
repoConfig.setDesiredIndexSize(1L);
final IndexManager indexManager = new SimpleIndexManager(repoConfig);
final ArrayListEventStore eventStore = new ArrayListEventStore();
final LuceneEventIndex index = new LuceneEventIndex(repoConfig, indexManager, 20_000, EventReporter.NO_OP);
index.initialize(eventStore);
for (int i = 0; i < 50_000; i++) {
final ProvenanceEventRecord event = createEvent("1234");
final StorageResult storageResult = eventStore.addEvent(event);
index.addEvents(storageResult.getStorageLocations());
}
while (index.getMaxEventId("1") < 40_000L) {
Thread.sleep(25);
}
final long id = index.getMinimumEventIdToReindex("1");
assertTrue(id >= 30000L);
}
use of org.apache.nifi.provenance.lucene.IndexManager in project nifi by apache.
the class TestLuceneEventIndex method addThenQueryWithEmptyQuery.
@Test(timeout = 60000)
public void addThenQueryWithEmptyQuery() throws InterruptedException {
assumeFalse(isWindowsEnvironment());
final RepositoryConfiguration repoConfig = createConfig();
final IndexManager indexManager = new SimpleIndexManager(repoConfig);
final LuceneEventIndex index = new LuceneEventIndex(repoConfig, indexManager, 1, EventReporter.NO_OP);
final ProvenanceEventRecord event = createEvent();
index.addEvent(event, new StorageSummary(event.getEventId(), "1.prov", "1", 1, 2L, 2L));
final Query query = new Query(UUID.randomUUID().toString());
final ArrayListEventStore eventStore = new ArrayListEventStore();
eventStore.addEvent(event);
index.initialize(eventStore);
// We don't know how long it will take for the event to be indexed, so keep querying until
// we get a result. The test will timeout after 5 seconds if we've still not succeeded.
List<ProvenanceEventRecord> matchingEvents = Collections.emptyList();
while (matchingEvents.isEmpty()) {
final QuerySubmission submission = index.submitQuery(query, EventAuthorizer.GRANT_ALL, "unit test user");
assertNotNull(submission);
final QueryResult result = submission.getResult();
assertNotNull(result);
result.awaitCompletion(100, TimeUnit.MILLISECONDS);
assertTrue(result.isFinished());
assertNull(result.getError());
matchingEvents = result.getMatchingEvents();
assertNotNull(matchingEvents);
// avoid crushing the CPU
Thread.sleep(100L);
}
assertEquals(1, matchingEvents.size());
assertEquals(event, matchingEvents.get(0));
}
use of org.apache.nifi.provenance.lucene.IndexManager in project nifi by apache.
the class TestLuceneEventIndex method testExpiration.
@Test(timeout = 60000)
public void testExpiration() throws InterruptedException, IOException {
final RepositoryConfiguration repoConfig = createConfig(1);
repoConfig.setDesiredIndexSize(1L);
final IndexManager indexManager = new SimpleIndexManager(repoConfig);
final LuceneEventIndex index = new LuceneEventIndex(repoConfig, indexManager, 1, EventReporter.NO_OP);
final List<ProvenanceEventRecord> events = new ArrayList<>();
events.add(createEvent(500000L));
events.add(createEvent());
final EventStore eventStore = Mockito.mock(EventStore.class);
Mockito.doAnswer(new Answer<List<ProvenanceEventRecord>>() {
@Override
public List<ProvenanceEventRecord> answer(final InvocationOnMock invocation) throws Throwable {
final Long eventId = invocation.getArgumentAt(0, Long.class);
assertEquals(0, eventId.longValue());
assertEquals(1, invocation.getArgumentAt(1, Integer.class).intValue());
return Collections.singletonList(events.get(0));
}
}).when(eventStore).getEvents(Mockito.anyLong(), Mockito.anyInt());
index.initialize(eventStore);
index.addEvent(events.get(0), createStorageSummary(events.get(0).getEventId()));
// Add the first event to the index and wait for it to be indexed, since indexing is asynchronous.
List<File> allDirectories = Collections.emptyList();
while (allDirectories.isEmpty()) {
allDirectories = index.getDirectoryManager().getDirectories(null, null);
}
// Remove the first event from the store
events.remove(0);
index.performMaintenance();
assertEquals(1, index.getDirectoryManager().getDirectories(null, null).size());
}
Aggregations