use of org.apache.nifi.provenance.toc.TocReader in project nifi by apache.
the class TestEventIdFirstSchemaRecordReaderWriter method testContentClaimUnchanged.
@Test
public void testContentClaimUnchanged() throws IOException {
final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite.gz");
final File tocFile = TocUtil.getTocFile(journalFile);
final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
final RecordWriter writer = createWriter(journalFile, tocWriter, true, 8192);
final Map<String, String> attributes = new HashMap<>();
attributes.put("filename", "1.txt");
attributes.put("uuid", UUID.randomUUID().toString());
final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder();
builder.setEventTime(System.currentTimeMillis());
builder.setEventType(ProvenanceEventType.RECEIVE);
builder.setTransitUri("nifi://unit-test");
builder.fromFlowFile(TestUtil.createFlowFile(3L, 3000L, attributes));
builder.setComponentId("1234");
builder.setComponentType("dummy processor");
builder.setPreviousContentClaim("container-1", "section-1", "identifier-1", 1L, 1L);
builder.setCurrentContentClaim("container-1", "section-1", "identifier-1", 1L, 1L);
final ProvenanceEventRecord record = builder.build();
writer.writeHeader(1L);
writer.writeRecord(record);
writer.close();
final TocReader tocReader = new StandardTocReader(tocFile);
try (final FileInputStream fis = new FileInputStream(journalFile);
final RecordReader reader = createReader(fis, journalFile.getName(), tocReader, 2048)) {
assertEquals(0, reader.getBlockIndex());
reader.skipToBlock(0);
final StandardProvenanceEventRecord recovered = reader.nextRecord();
assertNotNull(recovered);
assertEquals("nifi://unit-test", recovered.getTransitUri());
assertEquals("container-1", recovered.getPreviousContentClaimContainer());
assertEquals("container-1", recovered.getContentClaimContainer());
assertEquals("section-1", recovered.getPreviousContentClaimSection());
assertEquals("section-1", recovered.getContentClaimSection());
assertEquals("identifier-1", recovered.getPreviousContentClaimIdentifier());
assertEquals("identifier-1", recovered.getContentClaimIdentifier());
assertEquals(1L, recovered.getPreviousContentClaimOffset().longValue());
assertEquals(1L, recovered.getContentClaimOffset().longValue());
assertEquals(1L, recovered.getPreviousFileSize().longValue());
assertEquals(1L, recovered.getContentClaimOffset().longValue());
assertNull(reader.nextRecord());
}
FileUtils.deleteFile(journalFile.getParentFile(), true);
}
use of org.apache.nifi.provenance.toc.TocReader in project nifi by apache.
the class TestSchemaRecordReaderWriter method testFieldAddedToSchema.
@Test
public void testFieldAddedToSchema() throws IOException {
final RecordField unitTestField = new SimpleRecordField("Unit Test Field", FieldType.STRING, Repetition.EXACTLY_ONE);
final Consumer<List<RecordField>> schemaModifier = fields -> fields.add(unitTestField);
final Map<RecordField, Object> toAdd = new HashMap<>();
toAdd.put(unitTestField, "hello");
try (final ByteArraySchemaRecordWriter writer = createSchemaWriter(schemaModifier, toAdd)) {
writer.writeHeader(1L);
writer.writeRecord(createEvent());
writer.writeRecord(createEvent());
}
try (final InputStream in = new FileInputStream(journalFile);
final TocReader tocReader = new StandardTocReader(tocFile);
final RecordReader reader = createReader(in, journalFile.getName(), tocReader, 10000)) {
for (int i = 0; i < 2; i++) {
final StandardProvenanceEventRecord event = reader.nextRecord();
assertNotNull(event);
assertEquals("1234", event.getComponentId());
assertEquals(ProvenanceEventType.RECEIVE, event.getEventType());
assertNotNull(event.getUpdatedAttributes());
assertFalse(event.getUpdatedAttributes().isEmpty());
}
}
}
use of org.apache.nifi.provenance.toc.TocReader in project nifi by apache.
the class TestSchemaRecordReaderWriter method testReadPerformance.
@Test
@Ignore("For local performance testing only")
public void testReadPerformance() throws IOException, InterruptedException {
// This is a simple micro-benchmarking test so that we can determine how fast the serialization/deserialization is before
// making significant changes. This allows us to ensure that changes that we make do not have significant adverse effects
// on performance of the repository.
final ProvenanceEventRecord event = createEvent();
final TocReader tocReader = null;
final byte[] header;
try (final ByteArrayOutputStream headerOut = new ByteArrayOutputStream();
final DataOutputStream out = new DataOutputStream(headerOut)) {
final RecordWriter schemaWriter = new ByteArraySchemaRecordWriter(out, "out", idGenerator, null, false, 0);
schemaWriter.writeHeader(1L);
header = headerOut.toByteArray();
}
final byte[] serializedRecord;
try (final ByteArrayOutputStream headerOut = new ByteArrayOutputStream();
final RecordWriter writer = new ByteArraySchemaRecordWriter(headerOut, "out", idGenerator, null, false, 0)) {
writer.writeHeader(1L);
headerOut.reset();
writer.writeRecord(event);
writer.flush();
serializedRecord = headerOut.toByteArray();
}
final int numEvents = 10_000_000;
final int recordBytes = serializedRecord.length;
final long totalRecordBytes = (long) recordBytes * (long) numEvents;
final long startNanos = System.nanoTime();
try (final InputStream in = new LoopingInputStream(header, serializedRecord);
final RecordReader reader = new ByteArraySchemaRecordReader(in, "filename", tocReader, 100000)) {
for (int i = 0; i < numEvents; i++) {
reader.nextRecord();
}
}
final long nanos = System.nanoTime() - startNanos;
final long millis = TimeUnit.NANOSECONDS.toMillis(nanos);
final double seconds = millis / 1000D;
final long bytesPerSecond = (long) (totalRecordBytes / seconds);
final long megaBytesPerSecond = bytesPerSecond / 1024 / 1024;
System.out.println("Took " + millis + " millis to read " + numEvents + " events or " + megaBytesPerSecond + " MB/sec");
}
use of org.apache.nifi.provenance.toc.TocReader in project nifi by apache.
the class TestStandardRecordReaderWriter method testReadPerformance.
@Test
@Ignore("For local testing only")
public void testReadPerformance() throws IOException {
// This is a simple micro-benchmarking test so that we can determine how fast the serialization/deserialization is before
// making significant changes. This allows us to ensure that changes that we make do not have significant adverse effects
// on performance of the repository.
final ProvenanceEventRecord event = createEvent();
final TocReader tocReader = null;
final byte[] header;
try (final ByteArrayOutputStream headerOut = new ByteArrayOutputStream();
final DataOutputStream out = new DataOutputStream(headerOut)) {
out.writeUTF(PersistentProvenanceRepository.class.getName());
out.writeInt(9);
header = headerOut.toByteArray();
}
final byte[] serializedRecord;
try (final ByteArrayOutputStream headerOut = new ByteArrayOutputStream();
final StandardRecordWriter writer = new StandardRecordWriter(headerOut, "devnull", idGenerator, null, false, 0)) {
writer.writeHeader(1L);
headerOut.reset();
writer.writeRecord(event);
writer.flush();
serializedRecord = headerOut.toByteArray();
}
final int numEvents = 10_000_000;
final long startNanos = System.nanoTime();
try (final InputStream in = new LoopingInputStream(header, serializedRecord);
final RecordReader reader = new StandardRecordReader(in, "filename", tocReader, 100000)) {
for (int i = 0; i < numEvents; i++) {
reader.nextRecord();
}
}
final long nanos = System.nanoTime() - startNanos;
final long millis = TimeUnit.NANOSECONDS.toMillis(nanos);
System.out.println("Took " + millis + " millis to read " + numEvents + " events");
}
use of org.apache.nifi.provenance.toc.TocReader in project nifi by apache.
the class PersistentProvenanceRepository method getEvents.
@Override
public List<ProvenanceEventRecord> getEvents(final long firstRecordId, final int maxRecords, final NiFiUser user) throws IOException {
final List<ProvenanceEventRecord> records = new ArrayList<>(maxRecords);
final List<Path> paths = getPathsForId(firstRecordId);
if (paths == null || paths.isEmpty()) {
return records;
}
for (final Path path : paths) {
try (RecordReader reader = RecordReaders.newRecordReader(path.toFile(), getAllLogFiles(), maxAttributeChars)) {
// just to get to the first record that we want.
if (records.isEmpty()) {
final TocReader tocReader = reader.getTocReader();
if (tocReader != null) {
final Integer blockIndex = tocReader.getBlockIndexForEventId(firstRecordId);
if (blockIndex != null) {
reader.skipToBlock(blockIndex);
}
}
}
StandardProvenanceEventRecord record;
while (records.size() < maxRecords && (record = reader.nextRecord()) != null) {
if (record.getEventId() >= firstRecordId && isAuthorized(record, user)) {
records.add(record);
}
}
} catch (final EOFException | FileNotFoundException fnfe) {
// assume file aged off (or there's no data in file, in case of EOFException, which indicates that data was cached
// in operating system and entire O/S crashed and always.sync was not turned on.)
} catch (final IOException ioe) {
logger.error("Failed to read Provenance Event File {} due to {}", path.toFile(), ioe.toString());
logger.error("", ioe);
eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to read Provenance Event File " + path.toFile() + " due to " + ioe.toString());
}
if (records.size() >= maxRecords) {
break;
}
}
if (logger.isDebugEnabled()) {
logger.debug("Retrieving up to {} records starting at Event ID {}; returning {} events", maxRecords, firstRecordId, records.size());
}
return records;
}
Aggregations