use of org.apache.nifi.provenance.toc.TocReader in project nifi by apache.
the class TestSchemaRecordReaderWriter method testFieldRemovedFromSchema.
@Test
public void testFieldRemovedFromSchema() throws IOException {
final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
try {
// Create a schema that has the fields modified
final RecordSchema schemaV1 = ProvenanceEventSchema.PROVENANCE_EVENT_SCHEMA_V1;
final List<RecordField> fields = new ArrayList<>(schemaV1.getFields());
fields.remove(new SimpleRecordField(EventFieldNames.UPDATED_ATTRIBUTES, FieldType.STRING, Repetition.EXACTLY_ONE));
fields.remove(new SimpleRecordField(EventFieldNames.PREVIOUS_ATTRIBUTES, FieldType.STRING, Repetition.EXACTLY_ONE));
final RecordSchema recordSchema = new RecordSchema(fields);
// Create a record writer whose schema does not contain updated attributes or previous attributes.
// This means that we must also override the method that writes out attributes so that we are able
// to avoid actually writing them out.
final ByteArraySchemaRecordWriter writer = new ByteArraySchemaRecordWriter(journalFile, idGenerator, tocWriter, false, 0) {
@Override
public void writeHeader(long firstEventId, DataOutputStream out) throws IOException {
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
recordSchema.writeTo(baos);
out.writeInt(baos.size());
baos.writeTo(out);
}
@Override
protected Record createRecord(final ProvenanceEventRecord event, final long eventId) {
final RecordSchema contentClaimSchema = new RecordSchema(recordSchema.getField(EventFieldNames.CONTENT_CLAIM).getSubFields());
return new EventRecord(event, eventId, recordSchema, contentClaimSchema);
}
};
try {
writer.writeHeader(1L);
writer.writeRecord(createEvent());
writer.writeRecord(createEvent());
} finally {
writer.close();
}
} finally {
tocWriter.close();
}
// Read the records in and make sure that they have the info that we expect.
try (final InputStream in = new FileInputStream(journalFile);
final TocReader tocReader = new StandardTocReader(tocFile);
final RecordReader reader = createReader(in, journalFile.getName(), tocReader, 10000)) {
for (int i = 0; i < 2; i++) {
final StandardProvenanceEventRecord event = reader.nextRecord();
assertNotNull(event);
assertEquals(ProvenanceEventType.RECEIVE, event.getEventType());
// We will still have a Map<String, String> for updated attributes because the
// Provenance Event Builder will create an empty map.
assertNotNull(event.getUpdatedAttributes());
assertTrue(event.getUpdatedAttributes().isEmpty());
}
}
}
use of org.apache.nifi.provenance.toc.TocReader in project nifi by apache.
the class TestSchemaRecordReaderWriter method testAddOneRecordReadTwice.
@Test
public void testAddOneRecordReadTwice() throws IOException {
final RecordField unitTestField = new SimpleRecordField("Unit Test Field", FieldType.STRING, Repetition.EXACTLY_ONE);
final Consumer<List<RecordField>> schemaModifier = fields -> fields.add(unitTestField);
final Map<RecordField, Object> toAdd = new HashMap<>();
toAdd.put(unitTestField, "hello");
try (final ByteArraySchemaRecordWriter writer = createSchemaWriter(schemaModifier, toAdd)) {
writer.writeHeader(1L);
writer.writeRecord(createEvent());
}
try (final InputStream in = new FileInputStream(journalFile);
final TocReader tocReader = new StandardTocReader(tocFile);
final RecordReader reader = createReader(in, journalFile.getName(), tocReader, 10000)) {
final ProvenanceEventRecord firstEvent = reader.nextRecord();
assertNotNull(firstEvent);
final ProvenanceEventRecord secondEvent = reader.nextRecord();
assertNull(secondEvent);
}
}
use of org.apache.nifi.provenance.toc.TocReader in project nifi by apache.
the class DocsReader method getByteOffset.
private long getByteOffset(final Document d, final RecordReader reader) {
final IndexableField blockField = d.getField(FieldNames.BLOCK_INDEX);
if (blockField != null) {
final int blockIndex = blockField.numericValue().intValue();
final TocReader tocReader = reader.getTocReader();
return tocReader.getBlockOffset(blockIndex);
}
return d.getField(FieldNames.STORAGE_FILE_OFFSET).numericValue().longValue();
}
use of org.apache.nifi.provenance.toc.TocReader in project nifi by apache.
the class RecordReaders method newRecordReader.
/**
* Creates a new Record Reader that is capable of reading Provenance Event Journals
*
* @param file the Provenance Event Journal to read data from
* @param provenanceLogFiles collection of all provenance journal files
* @param maxAttributeChars the maximum number of characters to retrieve for any one attribute. This allows us to avoid
* issues where a FlowFile has an extremely large attribute and reading events
* for that FlowFile results in loading that attribute into memory many times, exhausting the Java Heap
* @return a Record Reader capable of reading Provenance Event Journals
* @throws IOException if unable to create a Record Reader for the given file
*/
public static RecordReader newRecordReader(File file, final Collection<Path> provenanceLogFiles, final int maxAttributeChars) throws IOException {
final File originalFile = file;
InputStream fis = null;
try {
if (!file.exists()) {
if (provenanceLogFiles != null) {
final String baseName = LuceneUtil.substringBefore(file.getName(), ".") + ".";
for (final Path path : provenanceLogFiles) {
if (path.toFile().getName().startsWith(baseName)) {
file = path.toFile();
break;
}
}
}
}
if (file.exists()) {
try {
fis = new FileInputStream(file);
} catch (final FileNotFoundException fnfe) {
fis = null;
}
}
String filename = file.getName();
openStream: while (fis == null) {
final File dir = file.getParentFile();
final String baseName = LuceneUtil.substringBefore(file.getName(), ".prov");
// compressing by the time that we are querying the data.
for (final String extension : new String[] { ".prov.gz", ".prov" }) {
file = new File(dir, baseName + extension);
if (file.exists()) {
try {
fis = new FileInputStream(file);
filename = baseName + extension;
break openStream;
} catch (final FileNotFoundException fnfe) {
// file was modified by a RolloverAction after we verified that it exists but before we could
// create an InputStream for it. Start over.
fis = null;
continue openStream;
}
}
}
break;
}
if (fis == null) {
throw new FileNotFoundException("Unable to locate file " + originalFile);
}
final File tocFile = TocUtil.getTocFile(file);
final InputStream bufferedInStream = new BufferedInputStream(fis);
final String serializationName;
try {
bufferedInStream.mark(4096);
final InputStream in = filename.endsWith(".gz") ? new GZIPInputStream(bufferedInStream) : bufferedInStream;
final DataInputStream dis = new DataInputStream(in);
serializationName = dis.readUTF();
bufferedInStream.reset();
} catch (final EOFException eof) {
fis.close();
return new EmptyRecordReader();
}
switch(serializationName) {
case StandardRecordReader.SERIALIZATION_NAME:
{
if (tocFile.exists()) {
final TocReader tocReader = new StandardTocReader(tocFile);
return new StandardRecordReader(bufferedInStream, filename, tocReader, maxAttributeChars);
} else {
return new StandardRecordReader(bufferedInStream, filename, maxAttributeChars);
}
}
case ByteArraySchemaRecordWriter.SERIALIZATION_NAME:
{
if (tocFile.exists()) {
final TocReader tocReader = new StandardTocReader(tocFile);
return new ByteArraySchemaRecordReader(bufferedInStream, filename, tocReader, maxAttributeChars);
} else {
return new ByteArraySchemaRecordReader(bufferedInStream, filename, maxAttributeChars);
}
}
case EventIdFirstSchemaRecordWriter.SERIALIZATION_NAME:
{
if (!tocFile.exists()) {
throw new FileNotFoundException("Cannot create TOC Reader because the file " + tocFile + " does not exist");
}
final TocReader tocReader = new StandardTocReader(tocFile);
return new EventIdFirstSchemaRecordReader(bufferedInStream, filename, tocReader, maxAttributeChars);
}
case EncryptedSchemaRecordReader.SERIALIZATION_NAME:
{
if (!tocFile.exists()) {
throw new FileNotFoundException("Cannot create TOC Reader because the file " + tocFile + " does not exist");
}
if (!isEncryptionAvailable()) {
throw new IOException("Cannot read encrypted repository because this reader is not configured for encryption");
}
final TocReader tocReader = new StandardTocReader(tocFile);
// Return a reader with no eventEncryptor because this method contract cannot change, then inject the encryptor from the writer in the calling method
return new EncryptedSchemaRecordReader(bufferedInStream, filename, tocReader, maxAttributeChars, null);
}
default:
{
throw new IOException("Unable to read data from file " + file + " because the file was written using an unknown Serializer: " + serializationName);
}
}
} catch (final IOException ioe) {
if (fis != null) {
try {
fis.close();
} catch (final IOException inner) {
ioe.addSuppressed(inner);
}
}
throw ioe;
}
}
use of org.apache.nifi.provenance.toc.TocReader in project nifi-minifi by apache.
the class MiNiFiPersistentProvenanceRepository method getEvents.
@Override
public List<ProvenanceEventRecord> getEvents(final long firstRecordId, final int maxRecords, final NiFiUser user) throws IOException {
final List<ProvenanceEventRecord> records = new ArrayList<>(maxRecords);
final List<Path> paths = getPathsForId(firstRecordId);
if (paths == null || paths.isEmpty()) {
return records;
}
for (final Path path : paths) {
try (RecordReader reader = RecordReaders.newRecordReader(path.toFile(), getAllLogFiles(), maxAttributeChars)) {
// just to get to the first record that we want.
if (records.isEmpty()) {
final TocReader tocReader = reader.getTocReader();
if (tocReader != null) {
final Integer blockIndex = tocReader.getBlockIndexForEventId(firstRecordId);
if (blockIndex != null) {
reader.skipToBlock(blockIndex);
}
}
}
StandardProvenanceEventRecord record;
while (records.size() < maxRecords && (record = reader.nextRecord()) != null) {
if (record.getEventId() >= firstRecordId && isAuthorized(record, user)) {
records.add(record);
}
}
} catch (final EOFException | FileNotFoundException fnfe) {
// assume file aged off (or there's no data in file, in case of EOFException, which indicates that data was cached
// in operating system and entire O/S crashed and always.sync was not turned on.)
} catch (final IOException ioe) {
logger.error("Failed to read Provenance Event File {} due to {}", path.toFile(), ioe.toString());
logger.error("", ioe);
eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to read Provenance Event File " + path.toFile() + " due to " + ioe.toString());
}
if (records.size() >= maxRecords) {
break;
}
}
if (logger.isDebugEnabled()) {
logger.debug("Retrieving up to {} records starting at Event ID {}; returning {} events", maxRecords, firstRecordId, records.size());
}
return records;
}
Aggregations