Search in sources :

Example 1 with LimitingInputStream

use of org.apache.nifi.stream.io.LimitingInputStream in project nifi by apache.

the class CompressableRecordReader method resetStreamForNextBlock.

private void resetStreamForNextBlock() throws IOException {
    final InputStream limitedStream;
    if (tocReader == null) {
        limitedStream = rawInputStream;
    } else {
        final long offset = tocReader.getBlockOffset(1 + getBlockIndex());
        if (offset < 0) {
            limitedStream = rawInputStream;
        } else {
            limitedStream = new LimitingInputStream(rawInputStream, offset - rawInputStream.getBytesConsumed());
        }
    }
    final InputStream readableStream;
    if (compressed) {
        readableStream = new BufferedInputStream(new GZIPInputStream(limitedStream));
    } else {
        readableStream = new BufferedInputStream(limitedStream);
    }
    byteCountingIn = new ByteCountingInputStream(readableStream, rawInputStream.getBytesConsumed());
    dis = new DataInputStream(byteCountingIn);
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) DataInputStream(java.io.DataInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) LimitingInputStream(org.apache.nifi.stream.io.LimitingInputStream) ByteCountingInputStream(org.apache.nifi.stream.io.ByteCountingInputStream) InputStream(java.io.InputStream) ByteCountingInputStream(org.apache.nifi.stream.io.ByteCountingInputStream) LimitingInputStream(org.apache.nifi.stream.io.LimitingInputStream) DataInputStream(java.io.DataInputStream)

Example 2 with LimitingInputStream

use of org.apache.nifi.stream.io.LimitingInputStream in project nifi by apache.

the class EncryptedSchemaRecordReader method readRecord.

private StandardProvenanceEventRecord readRecord(final DataInputStream inputStream, final long eventId, final long startOffset, final int recordLength) throws IOException {
    try {
        final InputStream limitedIn = new LimitingInputStream(inputStream, recordLength);
        byte[] encryptedSerializedBytes = new byte[recordLength];
        DataInputStream encryptedInputStream = new DataInputStream(limitedIn);
        encryptedInputStream.readFully(encryptedSerializedBytes);
        byte[] plainSerializedBytes = decrypt(encryptedSerializedBytes, Long.toString(eventId));
        InputStream plainStream = new ByteArrayInputStream(plainSerializedBytes);
        final Record eventRecord = getRecordReader().readRecord(plainStream);
        if (eventRecord == null) {
            return null;
        }
        final StandardProvenanceEventRecord deserializedEvent = LookupTableEventRecord.getEvent(eventRecord, getFilename(), startOffset, getMaxAttributeLength(), getFirstEventId(), getSystemTimeOffset(), getComponentIds(), getComponentTypes(), getQueueIds(), getEventTypes());
        deserializedEvent.setEventId(eventId);
        return deserializedEvent;
    } catch (EncryptionException e) {
        logger.error("Encountered an error reading the record: ", e);
        throw new IOException(e);
    }
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) DataInputStream(java.io.DataInputStream) LimitingInputStream(org.apache.nifi.stream.io.LimitingInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) LimitingInputStream(org.apache.nifi.stream.io.LimitingInputStream) Record(org.apache.nifi.repository.schema.Record) LookupTableEventRecord(org.apache.nifi.provenance.schema.LookupTableEventRecord) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream)

Example 3 with LimitingInputStream

use of org.apache.nifi.stream.io.LimitingInputStream in project nifi by apache.

the class LengthDelimitedJournal method validateHeader.

private synchronized SerDeAndVersion validateHeader(final DataInputStream in) throws IOException {
    final String journalClassName = in.readUTF();
    logger.debug("Write Ahead Log Class Name for {} is {}", journalFile, journalClassName);
    if (!LengthDelimitedJournal.class.getName().equals(journalClassName)) {
        throw new IOException("Invalid header information - " + journalFile + " does not appear to be a valid journal file.");
    }
    final int encodingVersion = in.readInt();
    logger.debug("Encoding version for {} is {}", journalFile, encodingVersion);
    if (encodingVersion > JOURNAL_ENCODING_VERSION) {
        throw new IOException("Cannot read journal file " + journalFile + " because it is encoded using veresion " + encodingVersion + " but this version of the code only understands version " + JOURNAL_ENCODING_VERSION + " and below");
    }
    final String serdeClassName = in.readUTF();
    logger.debug("Serde Class Name for {} is {}", journalFile, serdeClassName);
    final SerDe<T> serde;
    try {
        serde = serdeFactory.createSerDe(serdeClassName);
    } catch (final IllegalArgumentException iae) {
        throw new IOException("Cannot read journal file " + journalFile + " because the serializer/deserializer used was " + serdeClassName + " but this repository is configured to use a different type of serializer/deserializer");
    }
    final int serdeVersion = in.readInt();
    logger.debug("Serde version is {}", serdeVersion);
    if (serdeVersion > serde.getVersion()) {
        throw new IOException("Cannot read journal file " + journalFile + " because it is encoded using veresion " + encodingVersion + " of the serializer/deserializer but this version of the code only understands version " + serde.getVersion() + " and below");
    }
    final int serdeHeaderLength = in.readInt();
    final InputStream serdeHeaderIn = new LimitingInputStream(in, serdeHeaderLength);
    final DataInputStream dis = new DataInputStream(serdeHeaderIn);
    serde.readHeader(dis);
    return new SerDeAndVersion(serde, serdeVersion);
}
Also used : DataInputStream(java.io.DataInputStream) BufferedInputStream(java.io.BufferedInputStream) LimitingInputStream(org.apache.nifi.stream.io.LimitingInputStream) ByteCountingInputStream(org.apache.nifi.stream.io.ByteCountingInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) LimitingInputStream(org.apache.nifi.stream.io.LimitingInputStream) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream)

Example 4 with LimitingInputStream

use of org.apache.nifi.stream.io.LimitingInputStream in project nifi by apache.

the class LengthDelimitedJournal method recoverRecords.

@Override
public JournalRecovery recoverRecords(final Map<Object, T> recordMap, final Set<String> swapLocations) throws IOException {
    long maxTransactionId = -1L;
    int updateCount = 0;
    boolean eofException = false;
    logger.info("Recovering records from journal {}", journalFile);
    final double journalLength = journalFile.length();
    try (final InputStream fis = new FileInputStream(journalFile);
        final InputStream bufferedIn = new BufferedInputStream(fis);
        final ByteCountingInputStream byteCountingIn = new ByteCountingInputStream(bufferedIn);
        final DataInputStream in = new DataInputStream(byteCountingIn)) {
        try {
            // Validate that the header is what we expect and obtain the appropriate SerDe and Version information
            final SerDeAndVersion serdeAndVersion = validateHeader(in);
            final SerDe<T> serde = serdeAndVersion.getSerDe();
            // Ensure that we get a valid transaction indicator
            int transactionIndicator = in.read();
            if (transactionIndicator != TRANSACTION_FOLLOWS && transactionIndicator != JOURNAL_COMPLETE && transactionIndicator != -1) {
                throw new IOException("After reading " + byteCountingIn.getBytesConsumed() + " bytes from " + journalFile + ", encountered unexpected value of " + transactionIndicator + " for the Transaction Indicator. This journal may have been corrupted.");
            }
            long consumedAtLog = 0L;
            // We don't want to apply the updates in a transaction until we've finished recovering the entire
            // transaction. Otherwise, we could apply say 8 out of 10 updates and then hit an EOF. In such a case,
            // we want to rollback the entire transaction. We handle this by not updating recordMap or swapLocations
            // variables directly but instead keeping track of the things that occurred and then once we've read the
            // entire transaction, we can apply those updates to the recordMap and swapLocations.
            final Map<Object, T> transactionRecordMap = new HashMap<>();
            final Set<Object> idsRemoved = new HashSet<>();
            final Set<String> swapLocationsRemoved = new HashSet<>();
            final Set<String> swapLocationsAdded = new HashSet<>();
            int transactionUpdates = 0;
            // While we have a transaction to recover, recover it
            while (transactionIndicator == TRANSACTION_FOLLOWS) {
                transactionRecordMap.clear();
                idsRemoved.clear();
                swapLocationsRemoved.clear();
                swapLocationsAdded.clear();
                transactionUpdates = 0;
                // Format is <Transaction ID: 8 bytes> <Transaction Length: 4 bytes> <Transaction data: # of bytes indicated by Transaction Length Field>
                final long transactionId = in.readLong();
                maxTransactionId = Math.max(maxTransactionId, transactionId);
                final int transactionLength = in.readInt();
                // Use SerDe to deserialize the update. We use a LimitingInputStream to ensure that the SerDe is not able to read past its intended
                // length, in case there is a bug in the SerDe. We then use a ByteCountingInputStream so that we can ensure that all of the data has
                // been read and throw EOFException otherwise.
                final InputStream transactionLimitingIn = new LimitingInputStream(in, transactionLength);
                final ByteCountingInputStream transactionByteCountingIn = new ByteCountingInputStream(transactionLimitingIn);
                final DataInputStream transactionDis = new DataInputStream(transactionByteCountingIn);
                while (transactionByteCountingIn.getBytesConsumed() < transactionLength) {
                    final T record = serde.deserializeEdit(transactionDis, recordMap, serdeAndVersion.getVersion());
                    // Update our RecordMap so that we have the most up-to-date version of the Record.
                    final Object recordId = serde.getRecordIdentifier(record);
                    final UpdateType updateType = serde.getUpdateType(record);
                    switch(updateType) {
                        case DELETE:
                            {
                                idsRemoved.add(recordId);
                                transactionRecordMap.remove(recordId);
                                break;
                            }
                        case SWAP_IN:
                            {
                                final String location = serde.getLocation(record);
                                if (location == null) {
                                    logger.error("Recovered SWAP_IN record from edit log, but it did not contain a Location; skipping record");
                                } else {
                                    swapLocationsRemoved.add(location);
                                    swapLocationsAdded.remove(location);
                                    transactionRecordMap.put(recordId, record);
                                }
                                break;
                            }
                        case SWAP_OUT:
                            {
                                final String location = serde.getLocation(record);
                                if (location == null) {
                                    logger.error("Recovered SWAP_OUT record from edit log, but it did not contain a Location; skipping record");
                                } else {
                                    swapLocationsRemoved.remove(location);
                                    swapLocationsAdded.add(location);
                                    idsRemoved.add(recordId);
                                    transactionRecordMap.remove(recordId);
                                }
                                break;
                            }
                        default:
                            {
                                transactionRecordMap.put(recordId, record);
                                idsRemoved.remove(recordId);
                                break;
                            }
                    }
                    transactionUpdates++;
                }
                // Apply the transaction
                for (final Object id : idsRemoved) {
                    recordMap.remove(id);
                }
                recordMap.putAll(transactionRecordMap);
                swapLocations.removeAll(swapLocationsRemoved);
                swapLocations.addAll(swapLocationsAdded);
                updateCount += transactionUpdates;
                // Check if there is another transaction to read
                transactionIndicator = in.read();
                if (transactionIndicator != TRANSACTION_FOLLOWS && transactionIndicator != JOURNAL_COMPLETE && transactionIndicator != -1) {
                    throw new IOException("After reading " + byteCountingIn.getBytesConsumed() + " bytes from " + journalFile + ", encountered unexpected value of " + transactionIndicator + " for the Transaction Indicator. This journal may have been corrupted.");
                }
                // If we have a very large journal (for instance, if checkpoint is not called for a long time, or if there is a problem rolling over
                // the journal), then we want to occasionally notify the user that we are, in fact, making progress, so that it doesn't appear that
                // NiFi has become "stuck".
                final long consumed = byteCountingIn.getBytesConsumed();
                if (consumed - consumedAtLog > 50_000_000) {
                    final double percentage = consumed / journalLength * 100D;
                    final String pct = new DecimalFormat("#.00").format(percentage);
                    logger.info("{}% of the way finished recovering journal {}, having recovered {} updates", pct, journalFile, updateCount);
                    consumedAtLog = consumed;
                }
            }
        } catch (final EOFException eof) {
            eofException = true;
            logger.warn("Encountered unexpected End-of-File when reading journal file {}; assuming that NiFi was shutdown unexpectedly and continuing recovery", journalFile);
        } catch (final Exception e) {
            // In such a case, there is not much that we can do but to re-throw the Exception.
            if (remainingBytesAllNul(in)) {
                logger.warn("Failed to recover some of the data from Write-Ahead Log Journal because encountered trailing NUL bytes. " + "This will sometimes happen after a sudden power loss. The rest of this journal file will be skipped for recovery purposes." + "The following Exception was encountered while recovering the updates to the journal:", e);
            } else {
                throw e;
            }
        }
    }
    logger.info("Successfully recovered {} updates from journal {}", updateCount, journalFile);
    return new StandardJournalRecovery(updateCount, maxTransactionId, eofException);
}
Also used : HashMap(java.util.HashMap) DecimalFormat(java.text.DecimalFormat) ByteCountingInputStream(org.apache.nifi.stream.io.ByteCountingInputStream) BufferedInputStream(java.io.BufferedInputStream) EOFException(java.io.EOFException) HashSet(java.util.HashSet) DataInputStream(java.io.DataInputStream) BufferedInputStream(java.io.BufferedInputStream) LimitingInputStream(org.apache.nifi.stream.io.LimitingInputStream) ByteCountingInputStream(org.apache.nifi.stream.io.ByteCountingInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) LimitingInputStream(org.apache.nifi.stream.io.LimitingInputStream) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) UpdateType(org.wali.UpdateType) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) EOFException(java.io.EOFException) FileNotFoundException(java.io.FileNotFoundException)

Example 5 with LimitingInputStream

use of org.apache.nifi.stream.io.LimitingInputStream in project nifi by apache.

the class FlowController method getContent.

public InputStream getContent(final FlowFileRecord flowFile, final String requestor, final String requestUri) throws IOException {
    requireNonNull(flowFile);
    requireNonNull(requestor);
    requireNonNull(requestUri);
    InputStream stream;
    final ResourceClaim resourceClaim;
    final ContentClaim contentClaim = flowFile.getContentClaim();
    if (contentClaim == null) {
        resourceClaim = null;
        stream = new ByteArrayInputStream(new byte[0]);
    } else {
        resourceClaim = flowFile.getContentClaim().getResourceClaim();
        stream = contentRepository.read(flowFile.getContentClaim());
        final long contentClaimOffset = flowFile.getContentClaimOffset();
        if (contentClaimOffset > 0L) {
            StreamUtils.skip(stream, contentClaimOffset);
        }
        stream = new LimitingInputStream(stream, flowFile.getSize());
    }
    // Register a Provenance Event to indicate that we replayed the data.
    final StandardProvenanceEventRecord.Builder sendEventBuilder = new StandardProvenanceEventRecord.Builder().setEventType(ProvenanceEventType.DOWNLOAD).setFlowFileUUID(flowFile.getAttribute(CoreAttributes.UUID.key())).setAttributes(flowFile.getAttributes(), Collections.emptyMap()).setTransitUri(requestUri).setEventTime(System.currentTimeMillis()).setFlowFileEntryDate(flowFile.getEntryDate()).setLineageStartDate(flowFile.getLineageStartDate()).setComponentType(getName()).setComponentId(getRootGroupId()).setDetails("Download of Content requested by " + requestor + " for " + flowFile);
    if (contentClaim != null) {
        sendEventBuilder.setCurrentContentClaim(resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), contentClaim.getOffset() + flowFile.getContentClaimOffset(), flowFile.getSize());
    }
    final ProvenanceEventRecord sendEvent = sendEventBuilder.build();
    provenanceRepository.registerEvent(sendEvent);
    return stream;
}
Also used : StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) StandardContentClaim(org.apache.nifi.controller.repository.claim.StandardContentClaim) ContentClaim(org.apache.nifi.controller.repository.claim.ContentClaim) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) LimitingInputStream(org.apache.nifi.stream.io.LimitingInputStream) LimitedInputStream(org.apache.nifi.controller.repository.io.LimitedInputStream) InputStream(java.io.InputStream) StandardProvenanceEventRecord(org.apache.nifi.provenance.StandardProvenanceEventRecord) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) ResourceClaim(org.apache.nifi.controller.repository.claim.ResourceClaim) LimitingInputStream(org.apache.nifi.stream.io.LimitingInputStream)

Aggregations

LimitingInputStream (org.apache.nifi.stream.io.LimitingInputStream)8 InputStream (java.io.InputStream)7 DataInputStream (java.io.DataInputStream)6 ByteArrayInputStream (java.io.ByteArrayInputStream)4 IOException (java.io.IOException)4 BufferedInputStream (java.io.BufferedInputStream)3 Record (org.apache.nifi.repository.schema.Record)3 ByteCountingInputStream (org.apache.nifi.stream.io.ByteCountingInputStream)3 FileInputStream (java.io.FileInputStream)2 LookupTableEventRecord (org.apache.nifi.provenance.schema.LookupTableEventRecord)2 EOFException (java.io.EOFException)1 FileNotFoundException (java.io.FileNotFoundException)1 OutputStream (java.io.OutputStream)1 DecimalFormat (java.text.DecimalFormat)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 ContentClaim (org.apache.nifi.controller.repository.claim.ContentClaim)1 ResourceClaim (org.apache.nifi.controller.repository.claim.ResourceClaim)1