Search in sources :

Example 1 with ByteCountingInputStream

use of org.apache.nifi.stream.io.ByteCountingInputStream in project nifi by apache.

the class CompressableRecordReader method resetStreamForNextBlock.

private void resetStreamForNextBlock() throws IOException {
    final InputStream limitedStream;
    if (tocReader == null) {
        limitedStream = rawInputStream;
    } else {
        final long offset = tocReader.getBlockOffset(1 + getBlockIndex());
        if (offset < 0) {
            limitedStream = rawInputStream;
        } else {
            limitedStream = new LimitingInputStream(rawInputStream, offset - rawInputStream.getBytesConsumed());
        }
    }
    final InputStream readableStream;
    if (compressed) {
        readableStream = new BufferedInputStream(new GZIPInputStream(limitedStream));
    } else {
        readableStream = new BufferedInputStream(limitedStream);
    }
    byteCountingIn = new ByteCountingInputStream(readableStream, rawInputStream.getBytesConsumed());
    dis = new DataInputStream(byteCountingIn);
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) DataInputStream(java.io.DataInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) LimitingInputStream(org.apache.nifi.stream.io.LimitingInputStream) ByteCountingInputStream(org.apache.nifi.stream.io.ByteCountingInputStream) InputStream(java.io.InputStream) ByteCountingInputStream(org.apache.nifi.stream.io.ByteCountingInputStream) LimitingInputStream(org.apache.nifi.stream.io.LimitingInputStream) DataInputStream(java.io.DataInputStream)

Example 2 with ByteCountingInputStream

use of org.apache.nifi.stream.io.ByteCountingInputStream in project nifi by apache.

the class PutSplunk method processDelimitedMessages.

/**
 * Read delimited messages from the FlowFile tracking which messages are sent successfully.
 */
private void processDelimitedMessages(final ProcessContext context, final ProcessSession session, final FlowFile flowFile, final ChannelSender sender, final String delimiter) {
    final String protocol = context.getProperty(PROTOCOL).getValue();
    final byte[] delimiterBytes = delimiter.getBytes(StandardCharsets.UTF_8);
    // The NonThreadSafeCircularBuffer allows us to add a byte from the stream one at a time and see if it matches
    // some pattern. We can use this to search for the delimiter as we read through the stream of bytes in the FlowFile
    final NonThreadSafeCircularBuffer buffer = new NonThreadSafeCircularBuffer(delimiterBytes);
    final AtomicLong messagesSent = new AtomicLong(0L);
    final FlowFileMessageBatch messageBatch = new FlowFileMessageBatch(session, flowFile);
    activeBatches.add(messageBatch);
    try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
        session.read(flowFile, new InputStreamCallback() {

            @Override
            public void process(final InputStream rawIn) throws IOException {
                // contents of a single message
                byte[] data = null;
                boolean streamFinished = false;
                int nextByte;
                try (final InputStream bufferedIn = new BufferedInputStream(rawIn);
                    final ByteCountingInputStream in = new ByteCountingInputStream(bufferedIn)) {
                    long messageStartOffset = in.getBytesConsumed();
                    // read until we're out of data.
                    while (!streamFinished) {
                        nextByte = in.read();
                        if (nextByte > -1) {
                            baos.write(nextByte);
                        }
                        if (nextByte == -1) {
                            // we ran out of data. This message is complete.
                            data = getMessage(baos, baos.size(), protocol);
                            streamFinished = true;
                        } else if (buffer.addAndCompare((byte) nextByte)) {
                            // we matched our delimiter. This message is complete. We want all of the bytes from the
                            // underlying BAOS except for the last 'delimiterBytes.length' bytes because we don't want
                            // the delimiter itself to be sent.
                            data = getMessage(baos, baos.size() - delimiterBytes.length, protocol);
                        }
                        if (data != null) {
                            final long messageEndOffset = in.getBytesConsumed();
                            // If the message has no data, ignore it.
                            if (data.length != 0) {
                                final long rangeStart = messageStartOffset;
                                try {
                                    sender.send(data);
                                    messageBatch.addSuccessfulRange(rangeStart, messageEndOffset);
                                    messagesSent.incrementAndGet();
                                } catch (final IOException e) {
                                    messageBatch.addFailedRange(rangeStart, messageEndOffset, e);
                                }
                            }
                            // reset BAOS so that we can start a new message.
                            baos.reset();
                            data = null;
                            messageStartOffset = in.getBytesConsumed();
                        }
                    }
                }
            }
        });
        messageBatch.setNumMessages(messagesSent.get());
    } catch (final IOException ioe) {
    // Since this can be thrown only from closing the ByteArrayOutputStream(), we have already
    // completed everything that we need to do, so there's nothing really to be done here
    }
}
Also used : AtomicLong(java.util.concurrent.atomic.AtomicLong) NonThreadSafeCircularBuffer(org.apache.nifi.stream.io.util.NonThreadSafeCircularBuffer) BufferedInputStream(java.io.BufferedInputStream) BufferedInputStream(java.io.BufferedInputStream) ByteCountingInputStream(org.apache.nifi.stream.io.ByteCountingInputStream) InputStream(java.io.InputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) ByteCountingInputStream(org.apache.nifi.stream.io.ByteCountingInputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException)

Example 3 with ByteCountingInputStream

use of org.apache.nifi.stream.io.ByteCountingInputStream in project nifi by apache.

the class HttpInput method setInputStream.

public void setInputStream(InputStream inputStream) {
    interruptableIn = new InterruptableInputStream(inputStream);
    this.countingIn = new ByteCountingInputStream(interruptableIn);
}
Also used : InterruptableInputStream(org.apache.nifi.remote.io.InterruptableInputStream) ByteCountingInputStream(org.apache.nifi.stream.io.ByteCountingInputStream)

Example 4 with ByteCountingInputStream

use of org.apache.nifi.stream.io.ByteCountingInputStream in project nifi by apache.

the class LengthDelimitedJournal method recoverRecords.

@Override
public JournalRecovery recoverRecords(final Map<Object, T> recordMap, final Set<String> swapLocations) throws IOException {
    long maxTransactionId = -1L;
    int updateCount = 0;
    boolean eofException = false;
    logger.info("Recovering records from journal {}", journalFile);
    final double journalLength = journalFile.length();
    try (final InputStream fis = new FileInputStream(journalFile);
        final InputStream bufferedIn = new BufferedInputStream(fis);
        final ByteCountingInputStream byteCountingIn = new ByteCountingInputStream(bufferedIn);
        final DataInputStream in = new DataInputStream(byteCountingIn)) {
        try {
            // Validate that the header is what we expect and obtain the appropriate SerDe and Version information
            final SerDeAndVersion serdeAndVersion = validateHeader(in);
            final SerDe<T> serde = serdeAndVersion.getSerDe();
            // Ensure that we get a valid transaction indicator
            int transactionIndicator = in.read();
            if (transactionIndicator != TRANSACTION_FOLLOWS && transactionIndicator != JOURNAL_COMPLETE && transactionIndicator != -1) {
                throw new IOException("After reading " + byteCountingIn.getBytesConsumed() + " bytes from " + journalFile + ", encountered unexpected value of " + transactionIndicator + " for the Transaction Indicator. This journal may have been corrupted.");
            }
            long consumedAtLog = 0L;
            // We don't want to apply the updates in a transaction until we've finished recovering the entire
            // transaction. Otherwise, we could apply say 8 out of 10 updates and then hit an EOF. In such a case,
            // we want to rollback the entire transaction. We handle this by not updating recordMap or swapLocations
            // variables directly but instead keeping track of the things that occurred and then once we've read the
            // entire transaction, we can apply those updates to the recordMap and swapLocations.
            final Map<Object, T> transactionRecordMap = new HashMap<>();
            final Set<Object> idsRemoved = new HashSet<>();
            final Set<String> swapLocationsRemoved = new HashSet<>();
            final Set<String> swapLocationsAdded = new HashSet<>();
            int transactionUpdates = 0;
            // While we have a transaction to recover, recover it
            while (transactionIndicator == TRANSACTION_FOLLOWS) {
                transactionRecordMap.clear();
                idsRemoved.clear();
                swapLocationsRemoved.clear();
                swapLocationsAdded.clear();
                transactionUpdates = 0;
                // Format is <Transaction ID: 8 bytes> <Transaction Length: 4 bytes> <Transaction data: # of bytes indicated by Transaction Length Field>
                final long transactionId = in.readLong();
                maxTransactionId = Math.max(maxTransactionId, transactionId);
                final int transactionLength = in.readInt();
                // Use SerDe to deserialize the update. We use a LimitingInputStream to ensure that the SerDe is not able to read past its intended
                // length, in case there is a bug in the SerDe. We then use a ByteCountingInputStream so that we can ensure that all of the data has
                // been read and throw EOFException otherwise.
                final InputStream transactionLimitingIn = new LimitingInputStream(in, transactionLength);
                final ByteCountingInputStream transactionByteCountingIn = new ByteCountingInputStream(transactionLimitingIn);
                final DataInputStream transactionDis = new DataInputStream(transactionByteCountingIn);
                while (transactionByteCountingIn.getBytesConsumed() < transactionLength) {
                    final T record = serde.deserializeEdit(transactionDis, recordMap, serdeAndVersion.getVersion());
                    // Update our RecordMap so that we have the most up-to-date version of the Record.
                    final Object recordId = serde.getRecordIdentifier(record);
                    final UpdateType updateType = serde.getUpdateType(record);
                    switch(updateType) {
                        case DELETE:
                            {
                                idsRemoved.add(recordId);
                                transactionRecordMap.remove(recordId);
                                break;
                            }
                        case SWAP_IN:
                            {
                                final String location = serde.getLocation(record);
                                if (location == null) {
                                    logger.error("Recovered SWAP_IN record from edit log, but it did not contain a Location; skipping record");
                                } else {
                                    swapLocationsRemoved.add(location);
                                    swapLocationsAdded.remove(location);
                                    transactionRecordMap.put(recordId, record);
                                }
                                break;
                            }
                        case SWAP_OUT:
                            {
                                final String location = serde.getLocation(record);
                                if (location == null) {
                                    logger.error("Recovered SWAP_OUT record from edit log, but it did not contain a Location; skipping record");
                                } else {
                                    swapLocationsRemoved.remove(location);
                                    swapLocationsAdded.add(location);
                                    idsRemoved.add(recordId);
                                    transactionRecordMap.remove(recordId);
                                }
                                break;
                            }
                        default:
                            {
                                transactionRecordMap.put(recordId, record);
                                idsRemoved.remove(recordId);
                                break;
                            }
                    }
                    transactionUpdates++;
                }
                // Apply the transaction
                for (final Object id : idsRemoved) {
                    recordMap.remove(id);
                }
                recordMap.putAll(transactionRecordMap);
                swapLocations.removeAll(swapLocationsRemoved);
                swapLocations.addAll(swapLocationsAdded);
                updateCount += transactionUpdates;
                // Check if there is another transaction to read
                transactionIndicator = in.read();
                if (transactionIndicator != TRANSACTION_FOLLOWS && transactionIndicator != JOURNAL_COMPLETE && transactionIndicator != -1) {
                    throw new IOException("After reading " + byteCountingIn.getBytesConsumed() + " bytes from " + journalFile + ", encountered unexpected value of " + transactionIndicator + " for the Transaction Indicator. This journal may have been corrupted.");
                }
                // If we have a very large journal (for instance, if checkpoint is not called for a long time, or if there is a problem rolling over
                // the journal), then we want to occasionally notify the user that we are, in fact, making progress, so that it doesn't appear that
                // NiFi has become "stuck".
                final long consumed = byteCountingIn.getBytesConsumed();
                if (consumed - consumedAtLog > 50_000_000) {
                    final double percentage = consumed / journalLength * 100D;
                    final String pct = new DecimalFormat("#.00").format(percentage);
                    logger.info("{}% of the way finished recovering journal {}, having recovered {} updates", pct, journalFile, updateCount);
                    consumedAtLog = consumed;
                }
            }
        } catch (final EOFException eof) {
            eofException = true;
            logger.warn("Encountered unexpected End-of-File when reading journal file {}; assuming that NiFi was shutdown unexpectedly and continuing recovery", journalFile);
        } catch (final Exception e) {
            // In such a case, there is not much that we can do but to re-throw the Exception.
            if (remainingBytesAllNul(in)) {
                logger.warn("Failed to recover some of the data from Write-Ahead Log Journal because encountered trailing NUL bytes. " + "This will sometimes happen after a sudden power loss. The rest of this journal file will be skipped for recovery purposes." + "The following Exception was encountered while recovering the updates to the journal:", e);
            } else {
                throw e;
            }
        }
    }
    logger.info("Successfully recovered {} updates from journal {}", updateCount, journalFile);
    return new StandardJournalRecovery(updateCount, maxTransactionId, eofException);
}
Also used : HashMap(java.util.HashMap) DecimalFormat(java.text.DecimalFormat) ByteCountingInputStream(org.apache.nifi.stream.io.ByteCountingInputStream) BufferedInputStream(java.io.BufferedInputStream) EOFException(java.io.EOFException) HashSet(java.util.HashSet) DataInputStream(java.io.DataInputStream) BufferedInputStream(java.io.BufferedInputStream) LimitingInputStream(org.apache.nifi.stream.io.LimitingInputStream) ByteCountingInputStream(org.apache.nifi.stream.io.ByteCountingInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) LimitingInputStream(org.apache.nifi.stream.io.LimitingInputStream) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) UpdateType(org.wali.UpdateType) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) EOFException(java.io.EOFException) FileNotFoundException(java.io.FileNotFoundException)

Example 5 with ByteCountingInputStream

use of org.apache.nifi.stream.io.ByteCountingInputStream in project nifi by apache.

the class SocketProtocolListener method dispatchRequest.

@Override
public void dispatchRequest(final Socket socket) {
    String hostname = null;
    try {
        final StopWatch stopWatch = new StopWatch(true);
        hostname = socket.getInetAddress().getHostName();
        final String requestId = UUID.randomUUID().toString();
        logger.debug("Received request {} from {}", requestId, hostname);
        String requestorDn = getRequestorDN(socket);
        // unmarshall message
        final ProtocolMessageUnmarshaller<ProtocolMessage> unmarshaller = protocolContext.createUnmarshaller();
        final ByteCountingInputStream countingIn = new ByteCountingInputStream(socket.getInputStream());
        InputStream wrappedInStream = countingIn;
        if (logger.isDebugEnabled()) {
            // don't buffer more than 1 MB of the message
            final int maxMsgBuffer = 1024 * 1024;
            final CopyingInputStream copyingInputStream = new CopyingInputStream(wrappedInStream, maxMsgBuffer);
            wrappedInStream = copyingInputStream;
        }
        final ProtocolMessage request;
        try {
            request = unmarshaller.unmarshal(wrappedInStream);
        } finally {
            if (logger.isDebugEnabled() && wrappedInStream instanceof CopyingInputStream) {
                final CopyingInputStream copyingInputStream = (CopyingInputStream) wrappedInStream;
                byte[] receivedMessage = copyingInputStream.getBytesRead();
                logger.debug("Received message: " + new String(receivedMessage));
            }
        }
        request.setRequestorDN(requestorDn);
        // dispatch message to handler
        ProtocolHandler desiredHandler = null;
        final Collection<ProtocolHandler> handlers = getHandlers();
        for (final ProtocolHandler handler : handlers) {
            if (handler.canHandle(request)) {
                desiredHandler = handler;
                break;
            }
        }
        // if no handler found, throw exception; otherwise handle request
        if (desiredHandler == null) {
            logger.error("Received request of type {} but none of the following Protocol Handlers were able to process the request: {}", request.getType(), handlers);
            throw new ProtocolException("No handler assigned to handle message type: " + request.getType());
        } else {
            final ProtocolMessage response = desiredHandler.handle(request);
            if (response != null) {
                try {
                    logger.debug("Sending response for request {}", requestId);
                    // marshal message to output stream
                    final ProtocolMessageMarshaller<ProtocolMessage> marshaller = protocolContext.createMarshaller();
                    marshaller.marshal(response, socket.getOutputStream());
                } catch (final IOException ioe) {
                    throw new ProtocolException("Failed marshalling protocol message in response to message type: " + request.getType() + " due to " + ioe, ioe);
                }
            }
        }
        stopWatch.stop();
        final NodeIdentifier nodeId = getNodeIdentifier(request);
        final String from = nodeId == null ? hostname : nodeId.toString();
        logger.info("Finished processing request {} (type={}, length={} bytes) from {} in {}", requestId, request.getType(), countingIn.getBytesRead(), from, stopWatch.getDuration());
    } catch (final IOException | ProtocolException e) {
        logger.warn("Failed processing protocol message from " + hostname + " due to " + e, e);
        if (bulletinRepository != null) {
            final Bulletin bulletin = BulletinFactory.createBulletin("Clustering", "WARNING", String.format("Failed to process protocol message from %s due to: %s", hostname, e.toString()));
            bulletinRepository.addBulletin(bulletin);
        }
    }
}
Also used : ProtocolException(org.apache.nifi.cluster.protocol.ProtocolException) ByteCountingInputStream(org.apache.nifi.stream.io.ByteCountingInputStream) InputStream(java.io.InputStream) ByteCountingInputStream(org.apache.nifi.stream.io.ByteCountingInputStream) IOException(java.io.IOException) ProtocolMessage(org.apache.nifi.cluster.protocol.message.ProtocolMessage) StopWatch(org.apache.nifi.util.StopWatch) ProtocolHandler(org.apache.nifi.cluster.protocol.ProtocolHandler) Bulletin(org.apache.nifi.reporting.Bulletin) NodeIdentifier(org.apache.nifi.cluster.protocol.NodeIdentifier)

Aggregations

ByteCountingInputStream (org.apache.nifi.stream.io.ByteCountingInputStream)10 InputStream (java.io.InputStream)9 IOException (java.io.IOException)8 ByteArrayInputStream (java.io.ByteArrayInputStream)5 DisableOnCloseInputStream (org.apache.nifi.controller.repository.io.DisableOnCloseInputStream)5 FlowFileAccessInputStream (org.apache.nifi.controller.repository.io.FlowFileAccessInputStream)5 LimitedInputStream (org.apache.nifi.controller.repository.io.LimitedInputStream)5 TaskTerminationInputStream (org.apache.nifi.controller.repository.io.TaskTerminationInputStream)5 FlowFileAccessException (org.apache.nifi.processor.exception.FlowFileAccessException)5 ProcessException (org.apache.nifi.processor.exception.ProcessException)4 BufferedInputStream (java.io.BufferedInputStream)3 DataInputStream (java.io.DataInputStream)2 EOFException (java.io.EOFException)2 LimitingInputStream (org.apache.nifi.stream.io.LimitingInputStream)2 BufferedOutputStream (java.io.BufferedOutputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 FileInputStream (java.io.FileInputStream)1 FileNotFoundException (java.io.FileNotFoundException)1 OutputStream (java.io.OutputStream)1 DecimalFormat (java.text.DecimalFormat)1