Search in sources :

Example 11 with TocWriter

use of org.apache.nifi.provenance.toc.TocWriter in project nifi by apache.

the class CompressableRecordWriter method resetWriteStream.

/**
 * Resets the streams to prepare for a new block
 *
 * @param eventId the first id that will be written to the new block
 * @throws IOException if unable to flush/close the current streams properly
 */
protected void resetWriteStream(final Long eventId) throws IOException {
    try {
        if (out != null) {
            out.flush();
        }
        final long byteOffset = (byteCountingOut == null) ? rawOutStream.getBytesWritten() : byteCountingOut.getBytesWritten();
        final TocWriter tocWriter = getTocWriter();
        if (compressed) {
            // We don't have to check if the writer is dirty because we will have already checked before calling this method.
            if (out != null) {
                out.close();
            }
            if (tocWriter != null && eventId != null) {
                tocWriter.addBlockOffset(rawOutStream.getBytesWritten(), eventId);
            }
            final OutputStream writableStream = new BufferedOutputStream(new GZIPOutputStream(new NonCloseableOutputStream(rawOutStream), 1), 65536);
            this.byteCountingOut = new ByteCountingOutputStream(writableStream, byteOffset);
        } else {
            if (tocWriter != null && eventId != null) {
                tocWriter.addBlockOffset(rawOutStream.getBytesWritten(), eventId);
            }
            this.byteCountingOut = rawOutStream;
        }
        this.out = new DataOutputStream(byteCountingOut);
        resetDirtyFlag();
    } catch (final IOException ioe) {
        markDirty();
        throw ioe;
    }
}
Also used : GZIPOutputStream(org.apache.nifi.stream.io.GZIPOutputStream) TocWriter(org.apache.nifi.provenance.toc.TocWriter) DataOutputStream(java.io.DataOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) ByteCountingOutputStream(org.apache.nifi.stream.io.ByteCountingOutputStream) NonCloseableOutputStream(org.apache.nifi.stream.io.NonCloseableOutputStream) DataOutputStream(java.io.DataOutputStream) GZIPOutputStream(org.apache.nifi.stream.io.GZIPOutputStream) IOException(java.io.IOException) ByteCountingOutputStream(org.apache.nifi.stream.io.ByteCountingOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) NonCloseableOutputStream(org.apache.nifi.stream.io.NonCloseableOutputStream)

Example 12 with TocWriter

use of org.apache.nifi.provenance.toc.TocWriter in project nifi by apache.

the class EncryptedSchemaRecordWriter method writeRecord.

@Override
public StorageSummary writeRecord(final ProvenanceEventRecord record) throws IOException {
    final long encryptStart = System.nanoTime();
    byte[] cipherBytes;
    try {
        byte[] serialized;
        try (final ByteArrayOutputStream baos = new ByteArrayOutputStream(256);
            final DataOutputStream dos = new DataOutputStream(baos)) {
            writeRecord(record, 0L, dos);
            serialized = baos.toByteArray();
        }
        String eventId = record.getBestEventIdentifier();
        cipherBytes = encrypt(serialized, eventId);
    } catch (EncryptionException e) {
        logger.error("Encountered an error: ", e);
        throw new IOException("Error encrypting the provenance record", e);
    }
    final long encryptStop = System.nanoTime();
    final long lockStart = System.nanoTime();
    final long writeStart;
    final long startBytes;
    final long endBytes;
    final long recordIdentifier;
    synchronized (this) {
        writeStart = System.nanoTime();
        try {
            recordIdentifier = record.getEventId() == -1L ? getIdGenerator().getAndIncrement() : record.getEventId();
            startBytes = getBytesWritten();
            ensureStreamState(recordIdentifier, startBytes);
            final DataOutputStream out = getBufferedOutputStream();
            final int recordIdOffset = (int) (recordIdentifier - getFirstEventId());
            out.writeInt(recordIdOffset);
            out.writeInt(cipherBytes.length);
            out.write(cipherBytes);
            getRecordCount().incrementAndGet();
            endBytes = getBytesWritten();
        } catch (final IOException ioe) {
            markDirty();
            throw ioe;
        }
    }
    if (logger.isDebugEnabled()) {
        // Collect stats and periodically dump them if log level is set to at least info.
        final long writeNanos = System.nanoTime() - writeStart;
        getWriteTimes().add(new TimestampedLong(writeNanos));
        final long serializeNanos = lockStart - encryptStart;
        getSerializeTimes().add(new TimestampedLong(serializeNanos));
        final long encryptNanos = encryptStop - encryptStart;
        getEncryptTimes().add(new TimestampedLong(encryptNanos));
        final long lockNanos = writeStart - lockStart;
        getLockTimes().add(new TimestampedLong(lockNanos));
        getBytesWrittenBuffer().add(new TimestampedLong(endBytes - startBytes));
        final long recordCount = getTotalRecordCount().incrementAndGet();
        if (recordCount % debugFrequency == 0) {
            printStats();
        }
    }
    final long serializedLength = endBytes - startBytes;
    final TocWriter tocWriter = getTocWriter();
    final Integer blockIndex = tocWriter == null ? null : tocWriter.getCurrentBlockIndex();
    final File file = getFile();
    final String storageLocation = file.getParentFile().getName() + "/" + file.getName();
    return new StorageSummary(recordIdentifier, storageLocation, blockIndex, serializedLength, endBytes);
}
Also used : DataOutputStream(java.io.DataOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) TimestampedLong(org.apache.nifi.util.timebuffer.TimestampedLong) StorageSummary(org.apache.nifi.provenance.serialization.StorageSummary) TocWriter(org.apache.nifi.provenance.toc.TocWriter) File(java.io.File)

Example 13 with TocWriter

use of org.apache.nifi.provenance.toc.TocWriter in project nifi by apache.

the class AbstractTestRecordReaderWriter method testMultipleRecordsSameBlockCompressed.

@Test
public void testMultipleRecordsSameBlockCompressed() throws IOException {
    final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite.gz");
    final File tocFile = TocUtil.getTocFile(journalFile);
    final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
    // new record each 1 MB of uncompressed data
    final RecordWriter writer = createWriter(journalFile, tocWriter, true, 1024 * 1024);
    writer.writeHeader(1L);
    for (int i = 0; i < 10; i++) {
        writer.writeRecord(createEvent());
    }
    writer.close();
    final TocReader tocReader = new StandardTocReader(tocFile);
    try (final FileInputStream fis = new FileInputStream(journalFile);
        final RecordReader reader = createReader(fis, journalFile.getName(), tocReader, 2048)) {
        for (int i = 0; i < 10; i++) {
            assertEquals(0, reader.getBlockIndex());
            // the other half of the time to ensure that it's okay.
            if (i <= 5) {
                reader.skipToBlock(0);
            }
            final StandardProvenanceEventRecord recovered = reader.nextRecord();
            assertNotNull(recovered);
            assertEquals("nifi://unit-test", recovered.getTransitUri());
        }
        assertNull(reader.nextRecord());
    }
    FileUtils.deleteFile(journalFile.getParentFile(), true);
}
Also used : StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) File(java.io.File) FileInputStream(java.io.FileInputStream) Test(org.junit.Test)

Example 14 with TocWriter

use of org.apache.nifi.provenance.toc.TocWriter in project nifi by apache.

the class AbstractTestRecordReaderWriter method testSingleRecordCompressed.

@Test
public void testSingleRecordCompressed() throws IOException {
    final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite.gz");
    final File tocFile = TocUtil.getTocFile(journalFile);
    final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
    final RecordWriter writer = createWriter(journalFile, tocWriter, true, 8192);
    writer.writeHeader(1L);
    writer.writeRecord(createEvent());
    writer.close();
    final TocReader tocReader = new StandardTocReader(tocFile);
    assertRecoveredRecord(journalFile, tocReader, "nifi://unit-test", 0);
    FileUtils.deleteFile(journalFile.getParentFile(), true);
}
Also used : StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) File(java.io.File) Test(org.junit.Test)

Example 15 with TocWriter

use of org.apache.nifi.provenance.toc.TocWriter in project nifi by apache.

the class AbstractTestRecordReaderWriter method testSkipToEvent.

@Test
public void testSkipToEvent() throws IOException {
    final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite.gz");
    final File tocFile = TocUtil.getTocFile(journalFile);
    final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false);
    // new block each 10 bytes
    final RecordWriter writer = createWriter(journalFile, tocWriter, true, 100);
    writer.writeHeader(0L);
    final int numEvents = 10;
    final List<ProvenanceEventRecord> events = new ArrayList<>();
    for (int i = 0; i < numEvents; i++) {
        final ProvenanceEventRecord event = createEvent();
        events.add(event);
        writer.writeRecord(event);
    }
    writer.close();
    final TocReader tocReader = new StandardTocReader(tocFile);
    try (final FileInputStream fis = new FileInputStream(journalFile);
        final RecordReader reader = createReader(fis, journalFile.getName(), tocReader, 2048)) {
        for (int i = 0; i < numEvents; i++) {
            final Optional<ProvenanceEventRecord> eventOption = reader.skipToEvent(i);
            assertTrue(eventOption.isPresent());
            assertEquals(i, eventOption.get().getEventId());
            assertEquals(events.get(i), eventOption.get());
            final StandardProvenanceEventRecord consumedEvent = reader.nextRecord();
            assertEquals(eventOption.get(), consumedEvent);
        }
        assertFalse(reader.skipToEvent(numEvents + 1).isPresent());
    }
    try (final FileInputStream fis = new FileInputStream(journalFile);
        final RecordReader reader = createReader(fis, journalFile.getName(), tocReader, 2048)) {
        for (int i = 0; i < 3; i++) {
            final Optional<ProvenanceEventRecord> eventOption = reader.skipToEvent(8);
            assertTrue(eventOption.isPresent());
            assertEquals(events.get(8), eventOption.get());
        }
        final StandardProvenanceEventRecord consumedEvent = reader.nextRecord();
        assertEquals(events.get(8), consumedEvent);
    }
}
Also used : TocReader(org.apache.nifi.provenance.toc.TocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) StandardTocReader(org.apache.nifi.provenance.toc.StandardTocReader) RecordReader(org.apache.nifi.provenance.serialization.RecordReader) ArrayList(java.util.ArrayList) FileInputStream(java.io.FileInputStream) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) RecordWriter(org.apache.nifi.provenance.serialization.RecordWriter) StandardTocWriter(org.apache.nifi.provenance.toc.StandardTocWriter) TocWriter(org.apache.nifi.provenance.toc.TocWriter) File(java.io.File) Test(org.junit.Test)

Aggregations

TocWriter (org.apache.nifi.provenance.toc.TocWriter)23 StandardTocWriter (org.apache.nifi.provenance.toc.StandardTocWriter)18 File (java.io.File)16 Test (org.junit.Test)16 RecordWriter (org.apache.nifi.provenance.serialization.RecordWriter)13 StandardTocReader (org.apache.nifi.provenance.toc.StandardTocReader)12 TocReader (org.apache.nifi.provenance.toc.TocReader)12 FileInputStream (java.io.FileInputStream)10 RecordReader (org.apache.nifi.provenance.serialization.RecordReader)10 IOException (java.io.IOException)8 HashMap (java.util.HashMap)8 DataOutputStream (java.io.DataOutputStream)7 ByteArrayOutputStream (java.io.ByteArrayOutputStream)6 ArrayList (java.util.ArrayList)4 EventReporter (org.apache.nifi.events.EventReporter)4 RecordReaders (org.apache.nifi.provenance.serialization.RecordReaders)4 StorageSummary (org.apache.nifi.provenance.serialization.StorageSummary)4 NopTocWriter (org.apache.nifi.provenance.toc.NopTocWriter)4 TocUtil (org.apache.nifi.provenance.toc.TocUtil)4 OutputStream (java.io.OutputStream)3