use of org.apache.nifi.stream.io.LimitingInputStream in project nifi by apache.
the class CompressableRecordReader method resetStreamForNextBlock.
private void resetStreamForNextBlock() throws IOException {
final InputStream limitedStream;
if (tocReader == null) {
limitedStream = rawInputStream;
} else {
final long offset = tocReader.getBlockOffset(1 + getBlockIndex());
if (offset < 0) {
limitedStream = rawInputStream;
} else {
limitedStream = new LimitingInputStream(rawInputStream, offset - rawInputStream.getBytesConsumed());
}
}
final InputStream readableStream;
if (compressed) {
readableStream = new BufferedInputStream(new GZIPInputStream(limitedStream));
} else {
readableStream = new BufferedInputStream(limitedStream);
}
byteCountingIn = new ByteCountingInputStream(readableStream, rawInputStream.getBytesConsumed());
dis = new DataInputStream(byteCountingIn);
}
use of org.apache.nifi.stream.io.LimitingInputStream in project nifi by apache.
the class EncryptedSchemaRecordReader method readRecord.
private StandardProvenanceEventRecord readRecord(final DataInputStream inputStream, final long eventId, final long startOffset, final int recordLength) throws IOException {
try {
final InputStream limitedIn = new LimitingInputStream(inputStream, recordLength);
byte[] encryptedSerializedBytes = new byte[recordLength];
DataInputStream encryptedInputStream = new DataInputStream(limitedIn);
encryptedInputStream.readFully(encryptedSerializedBytes);
byte[] plainSerializedBytes = decrypt(encryptedSerializedBytes, Long.toString(eventId));
InputStream plainStream = new ByteArrayInputStream(plainSerializedBytes);
final Record eventRecord = getRecordReader().readRecord(plainStream);
if (eventRecord == null) {
return null;
}
final StandardProvenanceEventRecord deserializedEvent = LookupTableEventRecord.getEvent(eventRecord, getFilename(), startOffset, getMaxAttributeLength(), getFirstEventId(), getSystemTimeOffset(), getComponentIds(), getComponentTypes(), getQueueIds(), getEventTypes());
deserializedEvent.setEventId(eventId);
return deserializedEvent;
} catch (EncryptionException e) {
logger.error("Encountered an error reading the record: ", e);
throw new IOException(e);
}
}
use of org.apache.nifi.stream.io.LimitingInputStream in project nifi by apache.
the class LengthDelimitedJournal method validateHeader.
private synchronized SerDeAndVersion validateHeader(final DataInputStream in) throws IOException {
final String journalClassName = in.readUTF();
logger.debug("Write Ahead Log Class Name for {} is {}", journalFile, journalClassName);
if (!LengthDelimitedJournal.class.getName().equals(journalClassName)) {
throw new IOException("Invalid header information - " + journalFile + " does not appear to be a valid journal file.");
}
final int encodingVersion = in.readInt();
logger.debug("Encoding version for {} is {}", journalFile, encodingVersion);
if (encodingVersion > JOURNAL_ENCODING_VERSION) {
throw new IOException("Cannot read journal file " + journalFile + " because it is encoded using veresion " + encodingVersion + " but this version of the code only understands version " + JOURNAL_ENCODING_VERSION + " and below");
}
final String serdeClassName = in.readUTF();
logger.debug("Serde Class Name for {} is {}", journalFile, serdeClassName);
final SerDe<T> serde;
try {
serde = serdeFactory.createSerDe(serdeClassName);
} catch (final IllegalArgumentException iae) {
throw new IOException("Cannot read journal file " + journalFile + " because the serializer/deserializer used was " + serdeClassName + " but this repository is configured to use a different type of serializer/deserializer");
}
final int serdeVersion = in.readInt();
logger.debug("Serde version is {}", serdeVersion);
if (serdeVersion > serde.getVersion()) {
throw new IOException("Cannot read journal file " + journalFile + " because it is encoded using veresion " + encodingVersion + " of the serializer/deserializer but this version of the code only understands version " + serde.getVersion() + " and below");
}
final int serdeHeaderLength = in.readInt();
final InputStream serdeHeaderIn = new LimitingInputStream(in, serdeHeaderLength);
final DataInputStream dis = new DataInputStream(serdeHeaderIn);
serde.readHeader(dis);
return new SerDeAndVersion(serde, serdeVersion);
}
use of org.apache.nifi.stream.io.LimitingInputStream in project nifi by apache.
the class LengthDelimitedJournal method recoverRecords.
@Override
public JournalRecovery recoverRecords(final Map<Object, T> recordMap, final Set<String> swapLocations) throws IOException {
long maxTransactionId = -1L;
int updateCount = 0;
boolean eofException = false;
logger.info("Recovering records from journal {}", journalFile);
final double journalLength = journalFile.length();
try (final InputStream fis = new FileInputStream(journalFile);
final InputStream bufferedIn = new BufferedInputStream(fis);
final ByteCountingInputStream byteCountingIn = new ByteCountingInputStream(bufferedIn);
final DataInputStream in = new DataInputStream(byteCountingIn)) {
try {
// Validate that the header is what we expect and obtain the appropriate SerDe and Version information
final SerDeAndVersion serdeAndVersion = validateHeader(in);
final SerDe<T> serde = serdeAndVersion.getSerDe();
// Ensure that we get a valid transaction indicator
int transactionIndicator = in.read();
if (transactionIndicator != TRANSACTION_FOLLOWS && transactionIndicator != JOURNAL_COMPLETE && transactionIndicator != -1) {
throw new IOException("After reading " + byteCountingIn.getBytesConsumed() + " bytes from " + journalFile + ", encountered unexpected value of " + transactionIndicator + " for the Transaction Indicator. This journal may have been corrupted.");
}
long consumedAtLog = 0L;
// We don't want to apply the updates in a transaction until we've finished recovering the entire
// transaction. Otherwise, we could apply say 8 out of 10 updates and then hit an EOF. In such a case,
// we want to rollback the entire transaction. We handle this by not updating recordMap or swapLocations
// variables directly but instead keeping track of the things that occurred and then once we've read the
// entire transaction, we can apply those updates to the recordMap and swapLocations.
final Map<Object, T> transactionRecordMap = new HashMap<>();
final Set<Object> idsRemoved = new HashSet<>();
final Set<String> swapLocationsRemoved = new HashSet<>();
final Set<String> swapLocationsAdded = new HashSet<>();
int transactionUpdates = 0;
// While we have a transaction to recover, recover it
while (transactionIndicator == TRANSACTION_FOLLOWS) {
transactionRecordMap.clear();
idsRemoved.clear();
swapLocationsRemoved.clear();
swapLocationsAdded.clear();
transactionUpdates = 0;
// Format is <Transaction ID: 8 bytes> <Transaction Length: 4 bytes> <Transaction data: # of bytes indicated by Transaction Length Field>
final long transactionId = in.readLong();
maxTransactionId = Math.max(maxTransactionId, transactionId);
final int transactionLength = in.readInt();
// Use SerDe to deserialize the update. We use a LimitingInputStream to ensure that the SerDe is not able to read past its intended
// length, in case there is a bug in the SerDe. We then use a ByteCountingInputStream so that we can ensure that all of the data has
// been read and throw EOFException otherwise.
final InputStream transactionLimitingIn = new LimitingInputStream(in, transactionLength);
final ByteCountingInputStream transactionByteCountingIn = new ByteCountingInputStream(transactionLimitingIn);
final DataInputStream transactionDis = new DataInputStream(transactionByteCountingIn);
while (transactionByteCountingIn.getBytesConsumed() < transactionLength) {
final T record = serde.deserializeEdit(transactionDis, recordMap, serdeAndVersion.getVersion());
// Update our RecordMap so that we have the most up-to-date version of the Record.
final Object recordId = serde.getRecordIdentifier(record);
final UpdateType updateType = serde.getUpdateType(record);
switch(updateType) {
case DELETE:
{
idsRemoved.add(recordId);
transactionRecordMap.remove(recordId);
break;
}
case SWAP_IN:
{
final String location = serde.getLocation(record);
if (location == null) {
logger.error("Recovered SWAP_IN record from edit log, but it did not contain a Location; skipping record");
} else {
swapLocationsRemoved.add(location);
swapLocationsAdded.remove(location);
transactionRecordMap.put(recordId, record);
}
break;
}
case SWAP_OUT:
{
final String location = serde.getLocation(record);
if (location == null) {
logger.error("Recovered SWAP_OUT record from edit log, but it did not contain a Location; skipping record");
} else {
swapLocationsRemoved.remove(location);
swapLocationsAdded.add(location);
idsRemoved.add(recordId);
transactionRecordMap.remove(recordId);
}
break;
}
default:
{
transactionRecordMap.put(recordId, record);
idsRemoved.remove(recordId);
break;
}
}
transactionUpdates++;
}
// Apply the transaction
for (final Object id : idsRemoved) {
recordMap.remove(id);
}
recordMap.putAll(transactionRecordMap);
swapLocations.removeAll(swapLocationsRemoved);
swapLocations.addAll(swapLocationsAdded);
updateCount += transactionUpdates;
// Check if there is another transaction to read
transactionIndicator = in.read();
if (transactionIndicator != TRANSACTION_FOLLOWS && transactionIndicator != JOURNAL_COMPLETE && transactionIndicator != -1) {
throw new IOException("After reading " + byteCountingIn.getBytesConsumed() + " bytes from " + journalFile + ", encountered unexpected value of " + transactionIndicator + " for the Transaction Indicator. This journal may have been corrupted.");
}
// If we have a very large journal (for instance, if checkpoint is not called for a long time, or if there is a problem rolling over
// the journal), then we want to occasionally notify the user that we are, in fact, making progress, so that it doesn't appear that
// NiFi has become "stuck".
final long consumed = byteCountingIn.getBytesConsumed();
if (consumed - consumedAtLog > 50_000_000) {
final double percentage = consumed / journalLength * 100D;
final String pct = new DecimalFormat("#.00").format(percentage);
logger.info("{}% of the way finished recovering journal {}, having recovered {} updates", pct, journalFile, updateCount);
consumedAtLog = consumed;
}
}
} catch (final EOFException eof) {
eofException = true;
logger.warn("Encountered unexpected End-of-File when reading journal file {}; assuming that NiFi was shutdown unexpectedly and continuing recovery", journalFile);
} catch (final Exception e) {
// In such a case, there is not much that we can do but to re-throw the Exception.
if (remainingBytesAllNul(in)) {
logger.warn("Failed to recover some of the data from Write-Ahead Log Journal because encountered trailing NUL bytes. " + "This will sometimes happen after a sudden power loss. The rest of this journal file will be skipped for recovery purposes." + "The following Exception was encountered while recovering the updates to the journal:", e);
} else {
throw e;
}
}
}
logger.info("Successfully recovered {} updates from journal {}", updateCount, journalFile);
return new StandardJournalRecovery(updateCount, maxTransactionId, eofException);
}
use of org.apache.nifi.stream.io.LimitingInputStream in project nifi by apache.
the class FlowController method getContent.
public InputStream getContent(final FlowFileRecord flowFile, final String requestor, final String requestUri) throws IOException {
requireNonNull(flowFile);
requireNonNull(requestor);
requireNonNull(requestUri);
InputStream stream;
final ResourceClaim resourceClaim;
final ContentClaim contentClaim = flowFile.getContentClaim();
if (contentClaim == null) {
resourceClaim = null;
stream = new ByteArrayInputStream(new byte[0]);
} else {
resourceClaim = flowFile.getContentClaim().getResourceClaim();
stream = contentRepository.read(flowFile.getContentClaim());
final long contentClaimOffset = flowFile.getContentClaimOffset();
if (contentClaimOffset > 0L) {
StreamUtils.skip(stream, contentClaimOffset);
}
stream = new LimitingInputStream(stream, flowFile.getSize());
}
// Register a Provenance Event to indicate that we replayed the data.
final StandardProvenanceEventRecord.Builder sendEventBuilder = new StandardProvenanceEventRecord.Builder().setEventType(ProvenanceEventType.DOWNLOAD).setFlowFileUUID(flowFile.getAttribute(CoreAttributes.UUID.key())).setAttributes(flowFile.getAttributes(), Collections.emptyMap()).setTransitUri(requestUri).setEventTime(System.currentTimeMillis()).setFlowFileEntryDate(flowFile.getEntryDate()).setLineageStartDate(flowFile.getLineageStartDate()).setComponentType(getName()).setComponentId(getRootGroupId()).setDetails("Download of Content requested by " + requestor + " for " + flowFile);
if (contentClaim != null) {
sendEventBuilder.setCurrentContentClaim(resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), contentClaim.getOffset() + flowFile.getContentClaimOffset(), flowFile.getSize());
}
final ProvenanceEventRecord sendEvent = sendEventBuilder.build();
provenanceRepository.registerEvent(sendEvent);
return stream;
}
Aggregations