use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.
the class TestPersistentProvenanceRepository method checkJournalRecords.
private long checkJournalRecords(final File storageDir, final Boolean exact) throws IOException {
File[] storagefiles = storageDir.listFiles();
long counter = 0;
assertNotNull(storagefiles);
for (final File file : storagefiles) {
if (file.isFile()) {
try (RecordReader reader = RecordReaders.newRecordReader(file, null, 2048)) {
ProvenanceEventRecord r;
ProvenanceEventRecord last = null;
while ((r = reader.nextRecord()) != null) {
if (exact) {
assertTrue(counter++ == r.getEventId());
} else {
assertTrue(counter++ <= r.getEventId());
}
}
}
}
}
return counter;
}
use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.
the class TestPersistentProvenanceRepository method testMergeJournals.
@Test
public void testMergeJournals() throws IOException, InterruptedException {
assumeFalse(isWindowsEnvironment());
final RepositoryConfiguration config = createConfiguration();
config.setMaxEventFileLife(3, TimeUnit.SECONDS);
repo = new PersistentProvenanceRepository(config, DEFAULT_ROLLOVER_MILLIS);
repo.initialize(getEventReporter(), null, null, IdentifierLookup.EMPTY);
final Map<String, String> attributes = new HashMap<>();
final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder();
builder.setEventTime(System.currentTimeMillis());
builder.setEventType(ProvenanceEventType.RECEIVE);
builder.setTransitUri("nifi://unit-test");
attributes.put("uuid", "12345678-0000-0000-0000-012345678912");
builder.fromFlowFile(createFlowFile(3L, 3000L, attributes));
builder.setComponentId("1234");
builder.setComponentType("dummy processor");
final ProvenanceEventRecord record = builder.build();
final ExecutorService exec = Executors.newFixedThreadPool(10);
for (int i = 0; i < 10000; i++) {
exec.submit(new Runnable() {
@Override
public void run() {
repo.registerEvent(record);
}
});
}
repo.waitForRollover();
final File storageDir = config.getStorageDirectories().values().iterator().next();
long counter = 0;
for (final File file : storageDir.listFiles()) {
if (file.isFile()) {
try (RecordReader reader = RecordReaders.newRecordReader(file, null, 2048)) {
ProvenanceEventRecord r = null;
while ((r = reader.nextRecord()) != null) {
assertEquals(counter++, r.getEventId());
}
}
}
}
assertEquals(10000, counter);
}
use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.
the class TestSchemaRecordReaderWriter method testFieldRemovedFromSchema.
@Test
public void testFieldRemovedFromSchema() throws IOException {
final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
try {
// Create a schema that has the fields modified
final RecordSchema schemaV1 = ProvenanceEventSchema.PROVENANCE_EVENT_SCHEMA_V1;
final List<RecordField> fields = new ArrayList<>(schemaV1.getFields());
fields.remove(new SimpleRecordField(EventFieldNames.UPDATED_ATTRIBUTES, FieldType.STRING, Repetition.EXACTLY_ONE));
fields.remove(new SimpleRecordField(EventFieldNames.PREVIOUS_ATTRIBUTES, FieldType.STRING, Repetition.EXACTLY_ONE));
final RecordSchema recordSchema = new RecordSchema(fields);
// Create a record writer whose schema does not contain updated attributes or previous attributes.
// This means that we must also override the method that writes out attributes so that we are able
// to avoid actually writing them out.
final ByteArraySchemaRecordWriter writer = new ByteArraySchemaRecordWriter(journalFile, idGenerator, tocWriter, false, 0) {
@Override
public void writeHeader(long firstEventId, DataOutputStream out) throws IOException {
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
recordSchema.writeTo(baos);
out.writeInt(baos.size());
baos.writeTo(out);
}
@Override
protected Record createRecord(final ProvenanceEventRecord event, final long eventId) {
final RecordSchema contentClaimSchema = new RecordSchema(recordSchema.getField(EventFieldNames.CONTENT_CLAIM).getSubFields());
return new EventRecord(event, eventId, recordSchema, contentClaimSchema);
}
};
try {
writer.writeHeader(1L);
writer.writeRecord(createEvent());
writer.writeRecord(createEvent());
} finally {
writer.close();
}
} finally {
tocWriter.close();
}
// Read the records in and make sure that they have the info that we expect.
try (final InputStream in = new FileInputStream(journalFile);
final TocReader tocReader = new StandardTocReader(tocFile);
final RecordReader reader = createReader(in, journalFile.getName(), tocReader, 10000)) {
for (int i = 0; i < 2; i++) {
final StandardProvenanceEventRecord event = reader.nextRecord();
assertNotNull(event);
assertEquals(ProvenanceEventType.RECEIVE, event.getEventType());
// We will still have a Map<String, String> for updated attributes because the
// Provenance Event Builder will create an empty map.
assertNotNull(event.getUpdatedAttributes());
assertTrue(event.getUpdatedAttributes().isEmpty());
}
}
}
use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.
the class TestSchemaRecordReaderWriter method testAddOneRecordReadTwice.
@Test
public void testAddOneRecordReadTwice() throws IOException {
final RecordField unitTestField = new SimpleRecordField("Unit Test Field", FieldType.STRING, Repetition.EXACTLY_ONE);
final Consumer<List<RecordField>> schemaModifier = fields -> fields.add(unitTestField);
final Map<RecordField, Object> toAdd = new HashMap<>();
toAdd.put(unitTestField, "hello");
try (final ByteArraySchemaRecordWriter writer = createSchemaWriter(schemaModifier, toAdd)) {
writer.writeHeader(1L);
writer.writeRecord(createEvent());
}
try (final InputStream in = new FileInputStream(journalFile);
final TocReader tocReader = new StandardTocReader(tocFile);
final RecordReader reader = createReader(in, journalFile.getName(), tocReader, 10000)) {
final ProvenanceEventRecord firstEvent = reader.nextRecord();
assertNotNull(firstEvent);
final ProvenanceEventRecord secondEvent = reader.nextRecord();
assertNull(secondEvent);
}
}
use of org.apache.nifi.provenance.serialization.RecordReader in project nifi by apache.
the class TestSchemaRecordReaderWriter method testPerformanceOfRandomAccessReads.
@Test
@Ignore("runs forever for performance analysis/profiling")
public void testPerformanceOfRandomAccessReads() throws Exception {
journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testPerformanceOfRandomAccessReads.gz");
tocFile = TocUtil.getTocFile(journalFile);
try (final RecordWriter writer = createWriter(journalFile, new StandardTocWriter(tocFile, true, false), true, 1024 * 32)) {
writer.writeHeader(0L);
for (int i = 0; i < 100_000; i++) {
writer.writeRecord(createEvent());
}
}
final long[] eventIds = new long[] { 4, 80, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 40_000, 80_000, 99_000 };
boolean loopForever = true;
while (loopForever) {
final long start = System.nanoTime();
for (int i = 0; i < 1000; i++) {
try (final InputStream in = new FileInputStream(journalFile);
final RecordReader reader = createReader(in, journalFile.getName(), new StandardTocReader(tocFile), 32 * 1024)) {
for (final long id : eventIds) {
time(() -> {
reader.skipToEvent(id);
return reader.nextRecord();
}, id);
}
}
}
final long ms = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
System.out.println(ms + " ms total");
}
}
Aggregations