use of org.apache.nifi.stream.io.ByteCountingInputStream in project nifi by apache.
the class CompressableRecordReader method resetStreamForNextBlock.
private void resetStreamForNextBlock() throws IOException {
final InputStream limitedStream;
if (tocReader == null) {
limitedStream = rawInputStream;
} else {
final long offset = tocReader.getBlockOffset(1 + getBlockIndex());
if (offset < 0) {
limitedStream = rawInputStream;
} else {
limitedStream = new LimitingInputStream(rawInputStream, offset - rawInputStream.getBytesConsumed());
}
}
final InputStream readableStream;
if (compressed) {
readableStream = new BufferedInputStream(new GZIPInputStream(limitedStream));
} else {
readableStream = new BufferedInputStream(limitedStream);
}
byteCountingIn = new ByteCountingInputStream(readableStream, rawInputStream.getBytesConsumed());
dis = new DataInputStream(byteCountingIn);
}
use of org.apache.nifi.stream.io.ByteCountingInputStream in project nifi by apache.
the class PutSplunk method processDelimitedMessages.
/**
* Read delimited messages from the FlowFile tracking which messages are sent successfully.
*/
private void processDelimitedMessages(final ProcessContext context, final ProcessSession session, final FlowFile flowFile, final ChannelSender sender, final String delimiter) {
final String protocol = context.getProperty(PROTOCOL).getValue();
final byte[] delimiterBytes = delimiter.getBytes(StandardCharsets.UTF_8);
// The NonThreadSafeCircularBuffer allows us to add a byte from the stream one at a time and see if it matches
// some pattern. We can use this to search for the delimiter as we read through the stream of bytes in the FlowFile
final NonThreadSafeCircularBuffer buffer = new NonThreadSafeCircularBuffer(delimiterBytes);
final AtomicLong messagesSent = new AtomicLong(0L);
final FlowFileMessageBatch messageBatch = new FlowFileMessageBatch(session, flowFile);
activeBatches.add(messageBatch);
try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream rawIn) throws IOException {
// contents of a single message
byte[] data = null;
boolean streamFinished = false;
int nextByte;
try (final InputStream bufferedIn = new BufferedInputStream(rawIn);
final ByteCountingInputStream in = new ByteCountingInputStream(bufferedIn)) {
long messageStartOffset = in.getBytesConsumed();
// read until we're out of data.
while (!streamFinished) {
nextByte = in.read();
if (nextByte > -1) {
baos.write(nextByte);
}
if (nextByte == -1) {
// we ran out of data. This message is complete.
data = getMessage(baos, baos.size(), protocol);
streamFinished = true;
} else if (buffer.addAndCompare((byte) nextByte)) {
// we matched our delimiter. This message is complete. We want all of the bytes from the
// underlying BAOS except for the last 'delimiterBytes.length' bytes because we don't want
// the delimiter itself to be sent.
data = getMessage(baos, baos.size() - delimiterBytes.length, protocol);
}
if (data != null) {
final long messageEndOffset = in.getBytesConsumed();
// If the message has no data, ignore it.
if (data.length != 0) {
final long rangeStart = messageStartOffset;
try {
sender.send(data);
messageBatch.addSuccessfulRange(rangeStart, messageEndOffset);
messagesSent.incrementAndGet();
} catch (final IOException e) {
messageBatch.addFailedRange(rangeStart, messageEndOffset, e);
}
}
// reset BAOS so that we can start a new message.
baos.reset();
data = null;
messageStartOffset = in.getBytesConsumed();
}
}
}
}
});
messageBatch.setNumMessages(messagesSent.get());
} catch (final IOException ioe) {
// Since this can be thrown only from closing the ByteArrayOutputStream(), we have already
// completed everything that we need to do, so there's nothing really to be done here
}
}
use of org.apache.nifi.stream.io.ByteCountingInputStream in project nifi by apache.
the class HttpInput method setInputStream.
public void setInputStream(InputStream inputStream) {
interruptableIn = new InterruptableInputStream(inputStream);
this.countingIn = new ByteCountingInputStream(interruptableIn);
}
use of org.apache.nifi.stream.io.ByteCountingInputStream in project nifi by apache.
the class LengthDelimitedJournal method recoverRecords.
@Override
public JournalRecovery recoverRecords(final Map<Object, T> recordMap, final Set<String> swapLocations) throws IOException {
long maxTransactionId = -1L;
int updateCount = 0;
boolean eofException = false;
logger.info("Recovering records from journal {}", journalFile);
final double journalLength = journalFile.length();
try (final InputStream fis = new FileInputStream(journalFile);
final InputStream bufferedIn = new BufferedInputStream(fis);
final ByteCountingInputStream byteCountingIn = new ByteCountingInputStream(bufferedIn);
final DataInputStream in = new DataInputStream(byteCountingIn)) {
try {
// Validate that the header is what we expect and obtain the appropriate SerDe and Version information
final SerDeAndVersion serdeAndVersion = validateHeader(in);
final SerDe<T> serde = serdeAndVersion.getSerDe();
// Ensure that we get a valid transaction indicator
int transactionIndicator = in.read();
if (transactionIndicator != TRANSACTION_FOLLOWS && transactionIndicator != JOURNAL_COMPLETE && transactionIndicator != -1) {
throw new IOException("After reading " + byteCountingIn.getBytesConsumed() + " bytes from " + journalFile + ", encountered unexpected value of " + transactionIndicator + " for the Transaction Indicator. This journal may have been corrupted.");
}
long consumedAtLog = 0L;
// We don't want to apply the updates in a transaction until we've finished recovering the entire
// transaction. Otherwise, we could apply say 8 out of 10 updates and then hit an EOF. In such a case,
// we want to rollback the entire transaction. We handle this by not updating recordMap or swapLocations
// variables directly but instead keeping track of the things that occurred and then once we've read the
// entire transaction, we can apply those updates to the recordMap and swapLocations.
final Map<Object, T> transactionRecordMap = new HashMap<>();
final Set<Object> idsRemoved = new HashSet<>();
final Set<String> swapLocationsRemoved = new HashSet<>();
final Set<String> swapLocationsAdded = new HashSet<>();
int transactionUpdates = 0;
// While we have a transaction to recover, recover it
while (transactionIndicator == TRANSACTION_FOLLOWS) {
transactionRecordMap.clear();
idsRemoved.clear();
swapLocationsRemoved.clear();
swapLocationsAdded.clear();
transactionUpdates = 0;
// Format is <Transaction ID: 8 bytes> <Transaction Length: 4 bytes> <Transaction data: # of bytes indicated by Transaction Length Field>
final long transactionId = in.readLong();
maxTransactionId = Math.max(maxTransactionId, transactionId);
final int transactionLength = in.readInt();
// Use SerDe to deserialize the update. We use a LimitingInputStream to ensure that the SerDe is not able to read past its intended
// length, in case there is a bug in the SerDe. We then use a ByteCountingInputStream so that we can ensure that all of the data has
// been read and throw EOFException otherwise.
final InputStream transactionLimitingIn = new LimitingInputStream(in, transactionLength);
final ByteCountingInputStream transactionByteCountingIn = new ByteCountingInputStream(transactionLimitingIn);
final DataInputStream transactionDis = new DataInputStream(transactionByteCountingIn);
while (transactionByteCountingIn.getBytesConsumed() < transactionLength) {
final T record = serde.deserializeEdit(transactionDis, recordMap, serdeAndVersion.getVersion());
// Update our RecordMap so that we have the most up-to-date version of the Record.
final Object recordId = serde.getRecordIdentifier(record);
final UpdateType updateType = serde.getUpdateType(record);
switch(updateType) {
case DELETE:
{
idsRemoved.add(recordId);
transactionRecordMap.remove(recordId);
break;
}
case SWAP_IN:
{
final String location = serde.getLocation(record);
if (location == null) {
logger.error("Recovered SWAP_IN record from edit log, but it did not contain a Location; skipping record");
} else {
swapLocationsRemoved.add(location);
swapLocationsAdded.remove(location);
transactionRecordMap.put(recordId, record);
}
break;
}
case SWAP_OUT:
{
final String location = serde.getLocation(record);
if (location == null) {
logger.error("Recovered SWAP_OUT record from edit log, but it did not contain a Location; skipping record");
} else {
swapLocationsRemoved.remove(location);
swapLocationsAdded.add(location);
idsRemoved.add(recordId);
transactionRecordMap.remove(recordId);
}
break;
}
default:
{
transactionRecordMap.put(recordId, record);
idsRemoved.remove(recordId);
break;
}
}
transactionUpdates++;
}
// Apply the transaction
for (final Object id : idsRemoved) {
recordMap.remove(id);
}
recordMap.putAll(transactionRecordMap);
swapLocations.removeAll(swapLocationsRemoved);
swapLocations.addAll(swapLocationsAdded);
updateCount += transactionUpdates;
// Check if there is another transaction to read
transactionIndicator = in.read();
if (transactionIndicator != TRANSACTION_FOLLOWS && transactionIndicator != JOURNAL_COMPLETE && transactionIndicator != -1) {
throw new IOException("After reading " + byteCountingIn.getBytesConsumed() + " bytes from " + journalFile + ", encountered unexpected value of " + transactionIndicator + " for the Transaction Indicator. This journal may have been corrupted.");
}
// If we have a very large journal (for instance, if checkpoint is not called for a long time, or if there is a problem rolling over
// the journal), then we want to occasionally notify the user that we are, in fact, making progress, so that it doesn't appear that
// NiFi has become "stuck".
final long consumed = byteCountingIn.getBytesConsumed();
if (consumed - consumedAtLog > 50_000_000) {
final double percentage = consumed / journalLength * 100D;
final String pct = new DecimalFormat("#.00").format(percentage);
logger.info("{}% of the way finished recovering journal {}, having recovered {} updates", pct, journalFile, updateCount);
consumedAtLog = consumed;
}
}
} catch (final EOFException eof) {
eofException = true;
logger.warn("Encountered unexpected End-of-File when reading journal file {}; assuming that NiFi was shutdown unexpectedly and continuing recovery", journalFile);
} catch (final Exception e) {
// In such a case, there is not much that we can do but to re-throw the Exception.
if (remainingBytesAllNul(in)) {
logger.warn("Failed to recover some of the data from Write-Ahead Log Journal because encountered trailing NUL bytes. " + "This will sometimes happen after a sudden power loss. The rest of this journal file will be skipped for recovery purposes." + "The following Exception was encountered while recovering the updates to the journal:", e);
} else {
throw e;
}
}
}
logger.info("Successfully recovered {} updates from journal {}", updateCount, journalFile);
return new StandardJournalRecovery(updateCount, maxTransactionId, eofException);
}
use of org.apache.nifi.stream.io.ByteCountingInputStream in project nifi by apache.
the class SocketProtocolListener method dispatchRequest.
@Override
public void dispatchRequest(final Socket socket) {
String hostname = null;
try {
final StopWatch stopWatch = new StopWatch(true);
hostname = socket.getInetAddress().getHostName();
final String requestId = UUID.randomUUID().toString();
logger.debug("Received request {} from {}", requestId, hostname);
String requestorDn = getRequestorDN(socket);
// unmarshall message
final ProtocolMessageUnmarshaller<ProtocolMessage> unmarshaller = protocolContext.createUnmarshaller();
final ByteCountingInputStream countingIn = new ByteCountingInputStream(socket.getInputStream());
InputStream wrappedInStream = countingIn;
if (logger.isDebugEnabled()) {
// don't buffer more than 1 MB of the message
final int maxMsgBuffer = 1024 * 1024;
final CopyingInputStream copyingInputStream = new CopyingInputStream(wrappedInStream, maxMsgBuffer);
wrappedInStream = copyingInputStream;
}
final ProtocolMessage request;
try {
request = unmarshaller.unmarshal(wrappedInStream);
} finally {
if (logger.isDebugEnabled() && wrappedInStream instanceof CopyingInputStream) {
final CopyingInputStream copyingInputStream = (CopyingInputStream) wrappedInStream;
byte[] receivedMessage = copyingInputStream.getBytesRead();
logger.debug("Received message: " + new String(receivedMessage));
}
}
request.setRequestorDN(requestorDn);
// dispatch message to handler
ProtocolHandler desiredHandler = null;
final Collection<ProtocolHandler> handlers = getHandlers();
for (final ProtocolHandler handler : handlers) {
if (handler.canHandle(request)) {
desiredHandler = handler;
break;
}
}
// if no handler found, throw exception; otherwise handle request
if (desiredHandler == null) {
logger.error("Received request of type {} but none of the following Protocol Handlers were able to process the request: {}", request.getType(), handlers);
throw new ProtocolException("No handler assigned to handle message type: " + request.getType());
} else {
final ProtocolMessage response = desiredHandler.handle(request);
if (response != null) {
try {
logger.debug("Sending response for request {}", requestId);
// marshal message to output stream
final ProtocolMessageMarshaller<ProtocolMessage> marshaller = protocolContext.createMarshaller();
marshaller.marshal(response, socket.getOutputStream());
} catch (final IOException ioe) {
throw new ProtocolException("Failed marshalling protocol message in response to message type: " + request.getType() + " due to " + ioe, ioe);
}
}
}
stopWatch.stop();
final NodeIdentifier nodeId = getNodeIdentifier(request);
final String from = nodeId == null ? hostname : nodeId.toString();
logger.info("Finished processing request {} (type={}, length={} bytes) from {} in {}", requestId, request.getType(), countingIn.getBytesRead(), from, stopWatch.getDuration());
} catch (final IOException | ProtocolException e) {
logger.warn("Failed processing protocol message from " + hostname + " due to " + e, e);
if (bulletinRepository != null) {
final Bulletin bulletin = BulletinFactory.createBulletin("Clustering", "WARNING", String.format("Failed to process protocol message from %s due to: %s", hostname, e.toString()));
bulletinRepository.addBulletin(bulletin);
}
}
}
Aggregations