use of org.apache.nifi.provenance.ProvenanceEventRecord in project nifi by apache.
the class TestWriteAheadStorePartition method testReindex.
@Test
@SuppressWarnings("unchecked")
public void testReindex() throws IOException {
final RepositoryConfiguration repoConfig = createConfig(1, "testReindex");
repoConfig.setMaxEventFileCount(5);
final String partitionName = repoConfig.getStorageDirectories().keySet().iterator().next();
final File storageDirectory = repoConfig.getStorageDirectories().values().iterator().next();
final RecordWriterFactory recordWriterFactory = (file, idGenerator, compressed, createToc) -> {
final TocWriter tocWriter = createToc ? new StandardTocWriter(TocUtil.getTocFile(file), false, false) : null;
return new EventIdFirstSchemaRecordWriter(file, idGenerator, tocWriter, compressed, 32 * 1024, IdentifierLookup.EMPTY);
};
final RecordReaderFactory recordReaderFactory = (file, logs, maxChars) -> RecordReaders.newRecordReader(file, logs, maxChars);
final WriteAheadStorePartition partition = new WriteAheadStorePartition(storageDirectory, partitionName, repoConfig, recordWriterFactory, recordReaderFactory, new LinkedBlockingQueue<>(), new AtomicLong(0L), EventReporter.NO_OP);
for (int i = 0; i < 100; i++) {
partition.addEvents(Collections.singleton(TestUtil.createEvent()));
}
final Map<ProvenanceEventRecord, StorageSummary> reindexedEvents = new ConcurrentHashMap<>();
final EventIndex eventIndex = Mockito.mock(EventIndex.class);
Mockito.doAnswer(new Answer<Object>() {
@Override
public Object answer(final InvocationOnMock invocation) throws Throwable {
final Map<ProvenanceEventRecord, StorageSummary> events = invocation.getArgumentAt(0, Map.class);
reindexedEvents.putAll(events);
return null;
}
}).when(eventIndex).reindexEvents(Mockito.anyMap());
Mockito.doReturn(18L).when(eventIndex).getMinimumEventIdToReindex("1");
partition.reindexLatestEvents(eventIndex);
final List<Long> eventIdsReindexed = reindexedEvents.values().stream().map(StorageSummary::getEventId).sorted().collect(Collectors.toList());
assertEquals(82, eventIdsReindexed.size());
for (int i = 0; i < eventIdsReindexed.size(); i++) {
assertEquals(18 + i, eventIdsReindexed.get(i).intValue());
}
}
use of org.apache.nifi.provenance.ProvenanceEventRecord in project nifi by apache.
the class TestSelectiveRecordReaderEventIterator method time.
private void time(final Callable<ProvenanceEventRecord> task, final long id) throws Exception {
final long start = System.nanoTime();
final ProvenanceEventRecord event = task.call();
Assert.assertNotNull(event);
Assert.assertEquals(id, event.getEventId());
// System.out.println(event);
final long nanos = System.nanoTime() - start;
final long millis = TimeUnit.NANOSECONDS.toMillis(nanos);
// System.out.println("Took " + millis + " ms to " + taskDescription);
}
use of org.apache.nifi.provenance.ProvenanceEventRecord in project nifi by apache.
the class QueryTask method readDocuments.
private Tuple<List<ProvenanceEventRecord>, Integer> readDocuments(final TopDocs topDocs, final IndexReader indexReader) {
// If no topDocs is supplied, just provide a Tuple that has no records and a hit count of 0.
if (topDocs == null || topDocs.totalHits == 0) {
return new Tuple<>(Collections.<ProvenanceEventRecord>emptyList(), 0);
}
final long start = System.nanoTime();
final List<Long> eventIds = Arrays.stream(topDocs.scoreDocs).mapToInt(scoreDoc -> scoreDoc.doc).mapToObj(docId -> {
try {
return indexReader.document(docId, LUCENE_FIELDS_TO_LOAD);
} catch (final Exception e) {
throw new SearchFailedException("Failed to read Provenance Events from Event File", e);
}
}).map(doc -> doc.getField(SearchableFields.Identifier.getSearchableFieldName()).numericValue().longValue()).collect(Collectors.toList());
final long endConvert = System.nanoTime();
final long ms = TimeUnit.NANOSECONDS.toMillis(endConvert - start);
logger.debug("Converting documents took {} ms", ms);
List<ProvenanceEventRecord> events;
try {
events = eventStore.getEvents(eventIds, authorizer, transformer);
} catch (IOException e) {
throw new SearchFailedException("Unable to retrieve events from the Provenance Store", e);
}
final long fetchEventNanos = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - endConvert);
logger.debug("Fetching {} events from Event Store took {} ms ({} events actually fetched)", eventIds.size(), fetchEventNanos, events.size());
final int totalHits = topDocs.totalHits;
return new Tuple<>(events, totalHits);
}
use of org.apache.nifi.provenance.ProvenanceEventRecord in project nifi by apache.
the class DocsReader method read.
public Set<ProvenanceEventRecord> read(final List<Document> docs, final EventAuthorizer authorizer, final Collection<Path> allProvenanceLogFiles, final AtomicInteger retrievalCount, final int maxResults, final int maxAttributeChars) throws IOException {
if (retrievalCount.get() >= maxResults) {
return Collections.emptySet();
}
final long start = System.nanoTime();
final Set<ProvenanceEventRecord> matchingRecords = new LinkedHashSet<>();
final Map<String, List<Document>> byStorageNameDocGroups = LuceneUtil.groupDocsByStorageFileName(docs);
int eventsReadThisFile = 0;
int logFileCount = 0;
for (String storageFileName : byStorageNameDocGroups.keySet()) {
final File provenanceEventFile = LuceneUtil.getProvenanceLogFile(storageFileName, allProvenanceLogFiles);
if (provenanceEventFile == null) {
logger.warn("Could not find Provenance Log File with " + "basename {} in the Provenance Repository; assuming " + "file has expired and continuing without it", storageFileName);
continue;
}
try (final RecordReader reader = RecordReaders.newRecordReader(provenanceEventFile, allProvenanceLogFiles, maxAttributeChars)) {
final Iterator<Document> docIter = byStorageNameDocGroups.get(storageFileName).iterator();
while (docIter.hasNext() && retrievalCount.getAndIncrement() < maxResults) {
final ProvenanceEventRecord event = getRecord(docIter.next(), reader);
if (event != null && authorizer.isAuthorized(event)) {
matchingRecords.add(event);
eventsReadThisFile++;
}
}
} catch (final Exception e) {
logger.warn("Failed to read Provenance Events. The event file '" + provenanceEventFile.getAbsolutePath() + "' may be missing or corrupt.", e);
}
}
logger.debug("Read {} records from previous file", eventsReadThisFile);
final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
logger.debug("Took {} ms to read {} events from {} prov log files", millis, matchingRecords.size(), logFileCount);
return matchingRecords;
}
use of org.apache.nifi.provenance.ProvenanceEventRecord in project nifi by apache.
the class IndexSearch method search.
public StandardQueryResult search(final org.apache.nifi.provenance.search.Query provenanceQuery, final NiFiUser user, final AtomicInteger retrievedCount, final long firstEventTimestamp) throws IOException {
if (retrievedCount.get() >= provenanceQuery.getMaxResults()) {
final StandardQueryResult sqr = new StandardQueryResult(provenanceQuery, 1);
sqr.update(Collections.<ProvenanceEventRecord>emptyList(), 0L);
logger.info("Skipping search of Provenance Index {} for {} because the max number of results ({}) has already been retrieved", indexDirectory, provenanceQuery, provenanceQuery.getMaxResults());
return sqr;
}
final long startNanos = System.nanoTime();
if (!indexDirectory.exists() && !indexDirectory.mkdirs()) {
throw new IOException("Unable to create Indexing Directory " + indexDirectory);
}
if (!indexDirectory.isDirectory()) {
throw new IOException("Indexing Directory specified is " + indexDirectory + ", but this is not a directory");
}
final StandardQueryResult sqr = new StandardQueryResult(provenanceQuery, 1);
final Set<ProvenanceEventRecord> matchingRecords;
// the repository, and we don't want those events to count toward the total number of matches.
if (provenanceQuery.getStartDate() == null || provenanceQuery.getStartDate().getTime() < firstEventTimestamp) {
provenanceQuery.setStartDate(new Date(firstEventTimestamp));
}
if (provenanceQuery.getEndDate() == null) {
provenanceQuery.setEndDate(new Date());
}
final Query luceneQuery = LuceneUtil.convertQuery(provenanceQuery);
final long start = System.nanoTime();
EventIndexSearcher searcher = null;
try {
searcher = indexManager.borrowIndexSearcher(indexDirectory);
final long searchStartNanos = System.nanoTime();
final long openSearcherNanos = searchStartNanos - start;
logger.debug("Searching {} for {}", this, provenanceQuery);
final TopDocs topDocs = searcher.getIndexSearcher().search(luceneQuery, provenanceQuery.getMaxResults());
final long finishSearch = System.nanoTime();
final long searchNanos = finishSearch - searchStartNanos;
logger.debug("Searching {} for {} took {} millis; opening searcher took {} millis", this, provenanceQuery, TimeUnit.NANOSECONDS.toMillis(searchNanos), TimeUnit.NANOSECONDS.toMillis(openSearcherNanos));
if (topDocs.totalHits == 0) {
sqr.update(Collections.<ProvenanceEventRecord>emptyList(), 0);
return sqr;
}
final DocsReader docsReader = new DocsReader();
final EventAuthorizer authorizer = new EventAuthorizer() {
@Override
public boolean isAuthorized(ProvenanceEventRecord event) {
return repository.isAuthorized(event, user);
}
@Override
public void authorize(ProvenanceEventRecord event) throws AccessDeniedException {
repository.authorize(event, user);
}
@Override
public List<ProvenanceEventRecord> filterUnauthorizedEvents(List<ProvenanceEventRecord> events) {
return repository.filterUnauthorizedEvents(events, user);
}
@Override
public Set<ProvenanceEventRecord> replaceUnauthorizedWithPlaceholders(Set<ProvenanceEventRecord> events) {
return repository.replaceUnauthorizedWithPlaceholders(events, user);
}
};
matchingRecords = docsReader.read(topDocs, authorizer, searcher.getIndexSearcher().getIndexReader(), repository.getAllLogFiles(), retrievedCount, provenanceQuery.getMaxResults(), maxAttributeChars);
final long readRecordsNanos = System.nanoTime() - finishSearch;
logger.debug("Reading {} records took {} millis for {}", matchingRecords.size(), TimeUnit.NANOSECONDS.toMillis(readRecordsNanos), this);
sqr.update(matchingRecords, topDocs.totalHits);
final long queryNanos = System.nanoTime() - startNanos;
logger.info("Successfully executed {} against Index {}; Search took {} milliseconds; Total Hits = {}", provenanceQuery, indexDirectory, TimeUnit.NANOSECONDS.toMillis(queryNanos), topDocs.totalHits);
return sqr;
} catch (final FileNotFoundException e) {
// nothing has been indexed yet, or the data has already aged off
logger.warn("Attempted to search Provenance Index {} but could not find the file due to {}", indexDirectory, e);
if (logger.isDebugEnabled()) {
logger.warn("", e);
}
sqr.update(Collections.<ProvenanceEventRecord>emptyList(), 0);
return sqr;
} finally {
if (searcher != null) {
indexManager.returnIndexSearcher(searcher);
}
}
}
Aggregations