use of org.apache.nifi.provenance.toc.TocWriter in project nifi by apache.
the class TestSchemaRecordReaderWriter method testFieldRemovedFromSchema.
@Test
public void testFieldRemovedFromSchema() throws IOException {
final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
try {
// Create a schema that has the fields modified
final RecordSchema schemaV1 = ProvenanceEventSchema.PROVENANCE_EVENT_SCHEMA_V1;
final List<RecordField> fields = new ArrayList<>(schemaV1.getFields());
fields.remove(new SimpleRecordField(EventFieldNames.UPDATED_ATTRIBUTES, FieldType.STRING, Repetition.EXACTLY_ONE));
fields.remove(new SimpleRecordField(EventFieldNames.PREVIOUS_ATTRIBUTES, FieldType.STRING, Repetition.EXACTLY_ONE));
final RecordSchema recordSchema = new RecordSchema(fields);
// Create a record writer whose schema does not contain updated attributes or previous attributes.
// This means that we must also override the method that writes out attributes so that we are able
// to avoid actually writing them out.
final ByteArraySchemaRecordWriter writer = new ByteArraySchemaRecordWriter(journalFile, idGenerator, tocWriter, false, 0) {
@Override
public void writeHeader(long firstEventId, DataOutputStream out) throws IOException {
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
recordSchema.writeTo(baos);
out.writeInt(baos.size());
baos.writeTo(out);
}
@Override
protected Record createRecord(final ProvenanceEventRecord event, final long eventId) {
final RecordSchema contentClaimSchema = new RecordSchema(recordSchema.getField(EventFieldNames.CONTENT_CLAIM).getSubFields());
return new EventRecord(event, eventId, recordSchema, contentClaimSchema);
}
};
try {
writer.writeHeader(1L);
writer.writeRecord(createEvent());
writer.writeRecord(createEvent());
} finally {
writer.close();
}
} finally {
tocWriter.close();
}
// Read the records in and make sure that they have the info that we expect.
try (final InputStream in = new FileInputStream(journalFile);
final TocReader tocReader = new StandardTocReader(tocFile);
final RecordReader reader = createReader(in, journalFile.getName(), tocReader, 10000)) {
for (int i = 0; i < 2; i++) {
final StandardProvenanceEventRecord event = reader.nextRecord();
assertNotNull(event);
assertEquals(ProvenanceEventType.RECEIVE, event.getEventType());
// We will still have a Map<String, String> for updated attributes because the
// Provenance Event Builder will create an empty map.
assertNotNull(event.getUpdatedAttributes());
assertTrue(event.getUpdatedAttributes().isEmpty());
}
}
}
use of org.apache.nifi.provenance.toc.TocWriter in project nifi by apache.
the class TestSchemaRecordReaderWriter method testWritePerformance.
@Test
@Ignore("For local testing only")
public void testWritePerformance() throws IOException {
// This is a simple micro-benchmarking test so that we can determine how fast the serialization/deserialization is before
// making significant changes. This allows us to ensure that changes that we make do not have significant adverse effects
// on performance of the repository.
final ProvenanceEventRecord event = createEvent();
final TocWriter tocWriter = new NopTocWriter();
final int numEvents = 10_000_000;
final long startNanos = System.nanoTime();
try (final OutputStream nullOut = new NullOutputStream();
final RecordWriter writer = new ByteArraySchemaRecordWriter(nullOut, "out", idGenerator, tocWriter, false, 0)) {
writer.writeHeader(0L);
for (int i = 0; i < numEvents; i++) {
writer.writeRecord(event);
}
}
final long nanos = System.nanoTime() - startNanos;
final long millis = TimeUnit.NANOSECONDS.toMillis(nanos);
System.out.println("Took " + millis + " millis to write " + numEvents + " events");
}
use of org.apache.nifi.provenance.toc.TocWriter in project nifi by apache.
the class TestPartitionedWriteAheadEventStore method testPerformanceOfAccessingEvents.
@Test
@Ignore
public void testPerformanceOfAccessingEvents() throws Exception {
final RecordWriterFactory recordWriterFactory = (file, idGenerator, compressed, createToc) -> {
final TocWriter tocWriter = createToc ? new StandardTocWriter(TocUtil.getTocFile(file), false, false) : null;
return new EventIdFirstSchemaRecordWriter(file, idGenerator, tocWriter, compressed, 1024 * 1024, IdentifierLookup.EMPTY);
};
final RecordReaderFactory recordReaderFactory = (file, logs, maxChars) -> RecordReaders.newRecordReader(file, logs, maxChars);
final PartitionedWriteAheadEventStore store = new PartitionedWriteAheadEventStore(createConfig(), recordWriterFactory, recordReaderFactory, EventReporter.NO_OP, new EventFileManager());
store.initialize();
assertEquals(-1, store.getMaxEventId());
for (int i = 0; i < 100_000; i++) {
final ProvenanceEventRecord event1 = createEvent();
store.addEvents(Collections.singleton(event1));
}
final List<Long> eventIdList = Arrays.asList(4L, 80L, 1024L, 40_000L, 80_000L, 99_000L);
while (true) {
for (int i = 0; i < 100; i++) {
time(() -> store.getEvents(eventIdList, EventAuthorizer.GRANT_ALL, EventTransformer.EMPTY_TRANSFORMER), "Fetch Events");
}
Thread.sleep(1000L);
}
}
use of org.apache.nifi.provenance.toc.TocWriter in project nifi by apache.
the class TestWriteAheadStorePartition method testReindex.
@Test
@SuppressWarnings("unchecked")
public void testReindex() throws IOException {
final RepositoryConfiguration repoConfig = createConfig(1, "testReindex");
repoConfig.setMaxEventFileCount(5);
final String partitionName = repoConfig.getStorageDirectories().keySet().iterator().next();
final File storageDirectory = repoConfig.getStorageDirectories().values().iterator().next();
final RecordWriterFactory recordWriterFactory = (file, idGenerator, compressed, createToc) -> {
final TocWriter tocWriter = createToc ? new StandardTocWriter(TocUtil.getTocFile(file), false, false) : null;
return new EventIdFirstSchemaRecordWriter(file, idGenerator, tocWriter, compressed, 32 * 1024, IdentifierLookup.EMPTY);
};
final RecordReaderFactory recordReaderFactory = (file, logs, maxChars) -> RecordReaders.newRecordReader(file, logs, maxChars);
final WriteAheadStorePartition partition = new WriteAheadStorePartition(storageDirectory, partitionName, repoConfig, recordWriterFactory, recordReaderFactory, new LinkedBlockingQueue<>(), new AtomicLong(0L), EventReporter.NO_OP);
for (int i = 0; i < 100; i++) {
partition.addEvents(Collections.singleton(TestUtil.createEvent()));
}
final Map<ProvenanceEventRecord, StorageSummary> reindexedEvents = new ConcurrentHashMap<>();
final EventIndex eventIndex = Mockito.mock(EventIndex.class);
Mockito.doAnswer(new Answer<Object>() {
@Override
public Object answer(final InvocationOnMock invocation) throws Throwable {
final Map<ProvenanceEventRecord, StorageSummary> events = invocation.getArgumentAt(0, Map.class);
reindexedEvents.putAll(events);
return null;
}
}).when(eventIndex).reindexEvents(Mockito.anyMap());
Mockito.doReturn(18L).when(eventIndex).getMinimumEventIdToReindex("1");
partition.reindexLatestEvents(eventIndex);
final List<Long> eventIdsReindexed = reindexedEvents.values().stream().map(StorageSummary::getEventId).sorted().collect(Collectors.toList());
assertEquals(82, eventIdsReindexed.size());
for (int i = 0; i < eventIdsReindexed.size(); i++) {
assertEquals(18 + i, eventIdsReindexed.get(i).intValue());
}
}
use of org.apache.nifi.provenance.toc.TocWriter in project nifi by apache.
the class EventIdFirstSchemaRecordWriter method writeRecord.
@Override
public StorageSummary writeRecord(final ProvenanceEventRecord record) throws IOException {
if (isDirty()) {
throw new IOException("Cannot update Provenance Repository because this Record Writer has already failed to write to the Repository");
}
final long lockStart;
final long writeStart;
final long startBytes;
final long endBytes;
final long recordIdentifier;
final long serializeStart = System.nanoTime();
final ByteArrayDataOutputStream bados = streamCache.checkOut();
try {
writeRecord(record, 0L, bados.getDataOutputStream());
lockStart = System.nanoTime();
synchronized (this) {
writeStart = System.nanoTime();
try {
recordIdentifier = record.getEventId() == -1L ? getIdGenerator().getAndIncrement() : record.getEventId();
startBytes = getBytesWritten();
ensureStreamState(recordIdentifier, startBytes);
final DataOutputStream out = getBufferedOutputStream();
final int recordIdOffset = (int) (recordIdentifier - firstEventId);
out.writeInt(recordIdOffset);
final ByteArrayOutputStream baos = bados.getByteArrayOutputStream();
out.writeInt(baos.size());
baos.writeTo(out);
recordCount.incrementAndGet();
endBytes = getBytesWritten();
} catch (final IOException ioe) {
markDirty();
throw ioe;
}
}
} finally {
streamCache.checkIn(bados);
}
if (logger.isDebugEnabled()) {
// Collect stats and periodically dump them if log level is set to at least info.
final long writeNanos = System.nanoTime() - writeStart;
writeTimes.add(new TimestampedLong(writeNanos));
final long serializeNanos = lockStart - serializeStart;
serializeTimes.add(new TimestampedLong(serializeNanos));
final long lockNanos = writeStart - lockStart;
lockTimes.add(new TimestampedLong(lockNanos));
bytesWritten.add(new TimestampedLong(endBytes - startBytes));
final long recordCount = totalRecordCount.incrementAndGet();
if (recordCount % 1_000_000 == 0) {
final long sixtySecondsAgo = System.currentTimeMillis() - 60000L;
final Long writeNanosLast60 = writeTimes.getAggregateValue(sixtySecondsAgo).getValue();
final Long lockNanosLast60 = lockTimes.getAggregateValue(sixtySecondsAgo).getValue();
final Long serializeNanosLast60 = serializeTimes.getAggregateValue(sixtySecondsAgo).getValue();
final Long bytesWrittenLast60 = bytesWritten.getAggregateValue(sixtySecondsAgo).getValue();
logger.debug("In the last 60 seconds, have spent {} millis writing to file ({} MB), {} millis waiting on synchronize block, {} millis serializing events", TimeUnit.NANOSECONDS.toMillis(writeNanosLast60), bytesWrittenLast60 / 1024 / 1024, TimeUnit.NANOSECONDS.toMillis(lockNanosLast60), TimeUnit.NANOSECONDS.toMillis(serializeNanosLast60));
}
}
final long serializedLength = endBytes - startBytes;
final TocWriter tocWriter = getTocWriter();
final Integer blockIndex = tocWriter == null ? null : tocWriter.getCurrentBlockIndex();
final File file = getFile();
final String storageLocation = file.getParentFile().getName() + "/" + file.getName();
return new StorageSummary(recordIdentifier, storageLocation, blockIndex, serializedLength, endBytes);
}
Aggregations