use of org.apache.nifi.provenance.toc.StandardTocWriter in project nifi by apache.
the class TestEventIdFirstSchemaRecordReaderWriter method testContentClaimRemoved.
@Test
public void testContentClaimRemoved() throws IOException {
final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite.gz");
final File tocFile = TocUtil.getTocFile(journalFile);
final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
final RecordWriter writer = createWriter(journalFile, tocWriter, true, 8192);
final Map<String, String> attributes = new HashMap<>();
attributes.put("filename", "1.txt");
attributes.put("uuid", UUID.randomUUID().toString());
final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder();
builder.setEventTime(System.currentTimeMillis());
builder.setEventType(ProvenanceEventType.RECEIVE);
builder.setTransitUri("nifi://unit-test");
builder.fromFlowFile(TestUtil.createFlowFile(3L, 3000L, attributes));
builder.setComponentId("1234");
builder.setComponentType("dummy processor");
builder.setPreviousContentClaim("container-1", "section-1", "identifier-1", 1L, 1L);
builder.setCurrentContentClaim(null, null, null, 0L, 0L);
final ProvenanceEventRecord record = builder.build();
writer.writeHeader(1L);
writer.writeRecord(record);
writer.close();
final TocReader tocReader = new StandardTocReader(tocFile);
try (final FileInputStream fis = new FileInputStream(journalFile);
final RecordReader reader = createReader(fis, journalFile.getName(), tocReader, 2048)) {
assertEquals(0, reader.getBlockIndex());
reader.skipToBlock(0);
final StandardProvenanceEventRecord recovered = reader.nextRecord();
assertNotNull(recovered);
assertEquals("nifi://unit-test", recovered.getTransitUri());
assertEquals("container-1", recovered.getPreviousContentClaimContainer());
assertNull(recovered.getContentClaimContainer());
assertEquals("section-1", recovered.getPreviousContentClaimSection());
assertNull(recovered.getContentClaimSection());
assertEquals("identifier-1", recovered.getPreviousContentClaimIdentifier());
assertNull(recovered.getContentClaimIdentifier());
assertEquals(1L, recovered.getPreviousContentClaimOffset().longValue());
assertNull(recovered.getContentClaimOffset());
assertEquals(1L, recovered.getPreviousFileSize().longValue());
assertEquals(0L, recovered.getFileSize());
assertNull(reader.nextRecord());
}
FileUtils.deleteFile(journalFile.getParentFile(), true);
}
use of org.apache.nifi.provenance.toc.StandardTocWriter in project nifi by apache.
the class TestSchemaRecordReaderWriter method testFieldRemovedFromSchema.
@Test
public void testFieldRemovedFromSchema() throws IOException {
final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
try {
// Create a schema that has the fields modified
final RecordSchema schemaV1 = ProvenanceEventSchema.PROVENANCE_EVENT_SCHEMA_V1;
final List<RecordField> fields = new ArrayList<>(schemaV1.getFields());
fields.remove(new SimpleRecordField(EventFieldNames.UPDATED_ATTRIBUTES, FieldType.STRING, Repetition.EXACTLY_ONE));
fields.remove(new SimpleRecordField(EventFieldNames.PREVIOUS_ATTRIBUTES, FieldType.STRING, Repetition.EXACTLY_ONE));
final RecordSchema recordSchema = new RecordSchema(fields);
// Create a record writer whose schema does not contain updated attributes or previous attributes.
// This means that we must also override the method that writes out attributes so that we are able
// to avoid actually writing them out.
final ByteArraySchemaRecordWriter writer = new ByteArraySchemaRecordWriter(journalFile, idGenerator, tocWriter, false, 0) {
@Override
public void writeHeader(long firstEventId, DataOutputStream out) throws IOException {
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
recordSchema.writeTo(baos);
out.writeInt(baos.size());
baos.writeTo(out);
}
@Override
protected Record createRecord(final ProvenanceEventRecord event, final long eventId) {
final RecordSchema contentClaimSchema = new RecordSchema(recordSchema.getField(EventFieldNames.CONTENT_CLAIM).getSubFields());
return new EventRecord(event, eventId, recordSchema, contentClaimSchema);
}
};
try {
writer.writeHeader(1L);
writer.writeRecord(createEvent());
writer.writeRecord(createEvent());
} finally {
writer.close();
}
} finally {
tocWriter.close();
}
// Read the records in and make sure that they have the info that we expect.
try (final InputStream in = new FileInputStream(journalFile);
final TocReader tocReader = new StandardTocReader(tocFile);
final RecordReader reader = createReader(in, journalFile.getName(), tocReader, 10000)) {
for (int i = 0; i < 2; i++) {
final StandardProvenanceEventRecord event = reader.nextRecord();
assertNotNull(event);
assertEquals(ProvenanceEventType.RECEIVE, event.getEventType());
// We will still have a Map<String, String> for updated attributes because the
// Provenance Event Builder will create an empty map.
assertNotNull(event.getUpdatedAttributes());
assertTrue(event.getUpdatedAttributes().isEmpty());
}
}
}
use of org.apache.nifi.provenance.toc.StandardTocWriter in project nifi by apache.
the class TestSchemaRecordReaderWriter method testPerformanceOfRandomAccessReads.
@Test
@Ignore("runs forever for performance analysis/profiling")
public void testPerformanceOfRandomAccessReads() throws Exception {
journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testPerformanceOfRandomAccessReads.gz");
tocFile = TocUtil.getTocFile(journalFile);
try (final RecordWriter writer = createWriter(journalFile, new StandardTocWriter(tocFile, true, false), true, 1024 * 32)) {
writer.writeHeader(0L);
for (int i = 0; i < 100_000; i++) {
writer.writeRecord(createEvent());
}
}
final long[] eventIds = new long[] { 4, 80, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 40_000, 80_000, 99_000 };
boolean loopForever = true;
while (loopForever) {
final long start = System.nanoTime();
for (int i = 0; i < 1000; i++) {
try (final InputStream in = new FileInputStream(journalFile);
final RecordReader reader = createReader(in, journalFile.getName(), new StandardTocReader(tocFile), 32 * 1024)) {
for (final long id : eventIds) {
time(() -> {
reader.skipToEvent(id);
return reader.nextRecord();
}, id);
}
}
}
final long ms = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
System.out.println(ms + " ms total");
}
}
use of org.apache.nifi.provenance.toc.StandardTocWriter in project nifi by apache.
the class TestPartitionedWriteAheadEventStore method testPerformanceOfAccessingEvents.
@Test
@Ignore
public void testPerformanceOfAccessingEvents() throws Exception {
final RecordWriterFactory recordWriterFactory = (file, idGenerator, compressed, createToc) -> {
final TocWriter tocWriter = createToc ? new StandardTocWriter(TocUtil.getTocFile(file), false, false) : null;
return new EventIdFirstSchemaRecordWriter(file, idGenerator, tocWriter, compressed, 1024 * 1024, IdentifierLookup.EMPTY);
};
final RecordReaderFactory recordReaderFactory = (file, logs, maxChars) -> RecordReaders.newRecordReader(file, logs, maxChars);
final PartitionedWriteAheadEventStore store = new PartitionedWriteAheadEventStore(createConfig(), recordWriterFactory, recordReaderFactory, EventReporter.NO_OP, new EventFileManager());
store.initialize();
assertEquals(-1, store.getMaxEventId());
for (int i = 0; i < 100_000; i++) {
final ProvenanceEventRecord event1 = createEvent();
store.addEvents(Collections.singleton(event1));
}
final List<Long> eventIdList = Arrays.asList(4L, 80L, 1024L, 40_000L, 80_000L, 99_000L);
while (true) {
for (int i = 0; i < 100; i++) {
time(() -> store.getEvents(eventIdList, EventAuthorizer.GRANT_ALL, EventTransformer.EMPTY_TRANSFORMER), "Fetch Events");
}
Thread.sleep(1000L);
}
}
use of org.apache.nifi.provenance.toc.StandardTocWriter in project nifi by apache.
the class TestWriteAheadStorePartition method testReindex.
@Test
@SuppressWarnings("unchecked")
public void testReindex() throws IOException {
final RepositoryConfiguration repoConfig = createConfig(1, "testReindex");
repoConfig.setMaxEventFileCount(5);
final String partitionName = repoConfig.getStorageDirectories().keySet().iterator().next();
final File storageDirectory = repoConfig.getStorageDirectories().values().iterator().next();
final RecordWriterFactory recordWriterFactory = (file, idGenerator, compressed, createToc) -> {
final TocWriter tocWriter = createToc ? new StandardTocWriter(TocUtil.getTocFile(file), false, false) : null;
return new EventIdFirstSchemaRecordWriter(file, idGenerator, tocWriter, compressed, 32 * 1024, IdentifierLookup.EMPTY);
};
final RecordReaderFactory recordReaderFactory = (file, logs, maxChars) -> RecordReaders.newRecordReader(file, logs, maxChars);
final WriteAheadStorePartition partition = new WriteAheadStorePartition(storageDirectory, partitionName, repoConfig, recordWriterFactory, recordReaderFactory, new LinkedBlockingQueue<>(), new AtomicLong(0L), EventReporter.NO_OP);
for (int i = 0; i < 100; i++) {
partition.addEvents(Collections.singleton(TestUtil.createEvent()));
}
final Map<ProvenanceEventRecord, StorageSummary> reindexedEvents = new ConcurrentHashMap<>();
final EventIndex eventIndex = Mockito.mock(EventIndex.class);
Mockito.doAnswer(new Answer<Object>() {
@Override
public Object answer(final InvocationOnMock invocation) throws Throwable {
final Map<ProvenanceEventRecord, StorageSummary> events = invocation.getArgumentAt(0, Map.class);
reindexedEvents.putAll(events);
return null;
}
}).when(eventIndex).reindexEvents(Mockito.anyMap());
Mockito.doReturn(18L).when(eventIndex).getMinimumEventIdToReindex("1");
partition.reindexLatestEvents(eventIndex);
final List<Long> eventIdsReindexed = reindexedEvents.values().stream().map(StorageSummary::getEventId).sorted().collect(Collectors.toList());
assertEquals(82, eventIdsReindexed.size());
for (int i = 0; i < eventIdsReindexed.size(); i++) {
assertEquals(18 + i, eventIdsReindexed.get(i).intValue());
}
}
Aggregations