use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.
the class TestLuceneEventIndex method addThenQueryWithEmptyQuery.
@Test(timeout = 60000)
public void addThenQueryWithEmptyQuery() throws InterruptedException {
assumeFalse(isWindowsEnvironment());
final RepositoryConfiguration repoConfig = createConfig();
final IndexManager indexManager = new SimpleIndexManager(repoConfig);
final LuceneEventIndex index = new LuceneEventIndex(repoConfig, indexManager, 1, EventReporter.NO_OP);
final ProvenanceEventRecord event = createEvent();
index.addEvent(event, new StorageSummary(event.getEventId(), "1.prov", "1", 1, 2L, 2L));
final Query query = new Query(UUID.randomUUID().toString());
final ArrayListEventStore eventStore = new ArrayListEventStore();
eventStore.addEvent(event);
index.initialize(eventStore);
// We don't know how long it will take for the event to be indexed, so keep querying until
// we get a result. The test will timeout after 5 seconds if we've still not succeeded.
List<ProvenanceEventRecord> matchingEvents = Collections.emptyList();
while (matchingEvents.isEmpty()) {
final QuerySubmission submission = index.submitQuery(query, EventAuthorizer.GRANT_ALL, "unit test user");
assertNotNull(submission);
final QueryResult result = submission.getResult();
assertNotNull(result);
result.awaitCompletion(100, TimeUnit.MILLISECONDS);
assertTrue(result.isFinished());
assertNull(result.getError());
matchingEvents = result.getMatchingEvents();
assertNotNull(matchingEvents);
// avoid crushing the CPU
Thread.sleep(100L);
}
assertEquals(1, matchingEvents.size());
assertEquals(event, matchingEvents.get(0));
}
use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.
the class TestLuceneEventIndex method testQuerySpecificField.
@Test(timeout = 50000)
public void testQuerySpecificField() throws InterruptedException {
final RepositoryConfiguration repoConfig = createConfig();
final IndexManager indexManager = new SimpleIndexManager(repoConfig);
final LuceneEventIndex index = new LuceneEventIndex(repoConfig, indexManager, 2, EventReporter.NO_OP);
// add 2 events, one of which we will query for.
final ProvenanceEventRecord event = createEvent();
index.addEvent(event, new StorageSummary(event.getEventId(), "1.prov", "1", 1, 2L, 2L));
index.addEvent(createEvent(), new StorageSummary(2L, "1.prov", "1", 1, 2L, 2L));
// Create a query that searches for the event with the FlowFile UUID equal to the first event's.
final Query query = new Query(UUID.randomUUID().toString());
query.addSearchTerm(SearchTerms.newSearchTerm(SearchableFields.FlowFileUUID, event.getFlowFileUuid()));
final ArrayListEventStore eventStore = new ArrayListEventStore();
eventStore.addEvent(event);
index.initialize(eventStore);
// We don't know how long it will take for the event to be indexed, so keep querying until
// we get a result. The test will timeout after 5 seconds if we've still not succeeded.
List<ProvenanceEventRecord> matchingEvents = Collections.emptyList();
while (matchingEvents.isEmpty()) {
final QuerySubmission submission = index.submitQuery(query, EventAuthorizer.GRANT_ALL, "unit test user");
assertNotNull(submission);
final QueryResult result = submission.getResult();
assertNotNull(result);
result.awaitCompletion(100, TimeUnit.MILLISECONDS);
assertTrue(result.isFinished());
assertNull(result.getError());
matchingEvents = result.getMatchingEvents();
assertNotNull(matchingEvents);
// avoid crushing the CPU
Thread.sleep(100L);
}
assertEquals(1, matchingEvents.size());
assertEquals(event, matchingEvents.get(0));
}
use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.
the class LuceneEventIndex method reindexEvents.
@Override
public void reindexEvents(final Map<ProvenanceEventRecord, StorageSummary> events) {
final EventIndexTask indexTask = new EventIndexTask(documentQueue, config, indexManager, directoryManager, EventIndexTask.DEFAULT_MAX_EVENTS_PER_COMMIT, eventReporter);
File lastIndexDir = null;
long lastEventTime = -2L;
final List<IndexableDocument> indexableDocs = new ArrayList<>(events.size());
for (final Map.Entry<ProvenanceEventRecord, StorageSummary> entry : events.entrySet()) {
final ProvenanceEventRecord event = entry.getKey();
final StorageSummary summary = entry.getValue();
for (final CachedQuery cachedQuery : cachedQueries) {
cachedQuery.update(event, summary);
}
final Document document = eventConverter.convert(event, summary);
if (document == null) {
logger.debug("Received Provenance Event {} to index but it contained no information that should be indexed, so skipping it", event.getEventId());
} else {
final File indexDir;
if (event.getEventTime() == lastEventTime) {
indexDir = lastIndexDir;
} else {
final List<File> files = getDirectoryManager().getDirectories(event.getEventTime(), null);
indexDir = files.isEmpty() ? null : files.get(0);
lastIndexDir = indexDir;
}
final IndexableDocument doc = new IndexableDocument(document, summary, indexDir);
indexableDocs.add(doc);
}
}
try {
indexTask.reIndex(indexableDocs, CommitPreference.PREVENT_COMMIT);
} catch (final IOException ioe) {
logger.error("Failed to reindex some Provenance Events", ioe);
eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to re-index some Provenance Events. " + "Some Provenance Events may not be available for querying. See logs for more information.");
}
}
use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.
the class WriteAheadStorePartition method addEvents.
@Override
public StorageResult addEvents(final Iterable<ProvenanceEventRecord> events) throws IOException {
if (closed) {
throw new IOException(this + " is closed");
}
// Claim a Record Writer Lease so that we have a writer to persist the events to
boolean claimed = false;
RecordWriterLease lease = null;
while (!claimed) {
lease = getLease();
claimed = lease.tryClaim();
if (claimed) {
break;
}
if (lease.shouldRoll()) {
tryRollover(lease);
}
}
// Add the events to the writer and ensure that we always
// relinquish the claim that we've obtained on the writer
Map<ProvenanceEventRecord, StorageSummary> storageMap;
final RecordWriter writer = lease.getWriter();
try {
storageMap = addEvents(events, writer);
} finally {
lease.relinquishClaim();
}
// Roll over the writer if necessary
Integer eventsRolledOver = null;
final boolean shouldRoll = lease.shouldRoll();
try {
if (shouldRoll && tryRollover(lease)) {
eventsRolledOver = writer.getRecordsWritten();
}
} catch (final IOException ioe) {
logger.error("Updated {} but failed to rollover to a new Event File", this, ioe);
}
final Integer rolloverCount = eventsRolledOver;
return new StorageResult() {
@Override
public Map<ProvenanceEventRecord, StorageSummary> getStorageLocations() {
return storageMap;
}
@Override
public boolean triggeredRollover() {
return rolloverCount != null;
}
@Override
public Integer getEventsRolledOver() {
return rolloverCount;
}
@Override
public String toString() {
return getStorageLocations().toString();
}
};
}
use of org.apache.nifi.provenance.serialization.StorageSummary in project nifi by apache.
the class WriteAheadStorePartition method addEvents.
private Map<ProvenanceEventRecord, StorageSummary> addEvents(final Iterable<ProvenanceEventRecord> events, final RecordWriter writer) throws IOException {
final Map<ProvenanceEventRecord, StorageSummary> locationMap = new HashMap<>();
try {
long maxId = -1L;
int numEvents = 0;
for (final ProvenanceEventRecord nextEvent : events) {
final StorageSummary writerSummary = writer.writeRecord(nextEvent);
final StorageSummary summaryWithIndex = new StorageSummary(writerSummary.getEventId(), writerSummary.getStorageLocation(), this.partitionName, writerSummary.getBlockIndex(), writerSummary.getSerializedLength(), writerSummary.getBytesWritten());
locationMap.put(nextEvent, summaryWithIndex);
maxId = summaryWithIndex.getEventId();
numEvents++;
}
if (numEvents == 0) {
return locationMap;
}
writer.flush();
// Update max event id to be equal to be the greater of the current value or the
// max value just written.
final long maxIdWritten = maxId;
this.maxEventId.getAndUpdate(cur -> maxIdWritten > cur ? maxIdWritten : cur);
if (config.isAlwaysSync()) {
writer.sync();
}
} catch (final Exception e) {
// We need to set the repoDirty flag before we release the lock for this journal.
// Otherwise, another thread may write to this journal -- this is a problem because
// the journal contains part of our record but not all of it. Writing to the end of this
// journal will result in corruption!
writer.markDirty();
throw e;
}
return locationMap;
}
Aggregations