Search in sources :

Example 31 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class PutIgniteCache method onTrigger.

/**
 * Handle flow files
 */
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
    final List<FlowFile> flowFiles = session.get(batchSize);
    if (flowFiles.isEmpty()) {
        return;
    }
    List<Map.Entry<String, byte[]>> cacheItems = new ArrayList<>();
    List<FlowFile> successfulFlowFiles = new ArrayList<>();
    List<FlowFile> failedFlowFiles = new ArrayList<>();
    try {
        for (int i = 0; i < flowFiles.size(); i++) {
            FlowFile flowFile = null;
            try {
                flowFile = flowFiles.get(i);
                String key = context.getProperty(IGNITE_CACHE_ENTRY_KEY).evaluateAttributeExpressions(flowFile).getValue();
                if (isFailedFlowFile(flowFile, key)) {
                    failedFlowFiles.add(flowFile);
                    continue;
                }
                final byte[] byteArray = new byte[(int) flowFile.getSize()];
                session.read(flowFile, new InputStreamCallback() {

                    @Override
                    public void process(final InputStream in) throws IOException {
                        StreamUtils.fillBuffer(in, byteArray, true);
                    }
                });
                cacheItems.add(new AbstractMap.SimpleEntry<String, byte[]>(key, byteArray));
                successfulFlowFiles.add(flowFile);
            } catch (Exception e) {
                getLogger().error("Failed to insert {} into IgniteDB due to {}", new Object[] { flowFile, e }, e);
                session.transfer(flowFile, REL_FAILURE);
                context.yield();
            }
        }
    } finally {
        if (!cacheItems.isEmpty()) {
            IgniteFuture<?> futures = igniteDataStreamer.addData(cacheItems);
            Object result = futures.get();
            getLogger().debug("Result {} of addData", new Object[] { result });
        }
        if (!successfulFlowFiles.isEmpty()) {
            successfulFlowFiles = updateSuccessfulFlowFileAttributes(flowFiles, successfulFlowFiles, session);
            session.transfer(successfulFlowFiles, REL_SUCCESS);
            for (FlowFile flowFile : successfulFlowFiles) {
                String key = context.getProperty(IGNITE_CACHE_ENTRY_KEY).evaluateAttributeExpressions(flowFile).getValue();
                session.getProvenanceReporter().send(flowFile, "ignite://cache/" + getIgniteCache().getName() + "/" + key);
            }
        }
        if (!failedFlowFiles.isEmpty()) {
            failedFlowFiles = updateFailedFlowFileAttributes(flowFiles, failedFlowFiles, session, context);
            session.transfer(failedFlowFiles, REL_FAILURE);
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) AbstractMap(java.util.AbstractMap) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback)

Example 32 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class PutHBaseCell method createPut.

@Override
protected PutFlowFile createPut(final ProcessSession session, final ProcessContext context, final FlowFile flowFile) {
    final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String row = context.getProperty(ROW_ID).evaluateAttributeExpressions(flowFile).getValue();
    final String columnFamily = context.getProperty(COLUMN_FAMILY).evaluateAttributeExpressions(flowFile).getValue();
    final String columnQualifier = context.getProperty(COLUMN_QUALIFIER).evaluateAttributeExpressions(flowFile).getValue();
    final String timestampValue = context.getProperty(TIMESTAMP).evaluateAttributeExpressions(flowFile).getValue();
    final Long timestamp;
    if (!StringUtils.isBlank(timestampValue)) {
        try {
            timestamp = Long.valueOf(timestampValue);
        } catch (Exception e) {
            getLogger().error("Invalid timestamp value: " + timestampValue, e);
            return null;
        }
    } else {
        timestamp = null;
    }
    final byte[] buffer = new byte[(int) flowFile.getSize()];
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream in) throws IOException {
            StreamUtils.fillBuffer(in, buffer);
        }
    });
    final Collection<PutColumn> columns = Collections.singletonList(new PutColumn(columnFamily.getBytes(StandardCharsets.UTF_8), columnQualifier.getBytes(StandardCharsets.UTF_8), buffer, timestamp));
    byte[] rowKeyBytes = getRow(row, context.getProperty(ROW_ID_ENCODING_STRATEGY).getValue());
    return new PutFlowFile(tableName, rowKeyBytes, columns, flowFile);
}
Also used : InputStream(java.io.InputStream) PutColumn(org.apache.nifi.hbase.put.PutColumn) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) IOException(java.io.IOException) IOException(java.io.IOException) PutFlowFile(org.apache.nifi.hbase.put.PutFlowFile)

Example 33 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class PublishKafkaRecord_0_10 method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final List<FlowFile> flowFiles = session.get(FlowFileFilters.newSizeBasedFilter(1, DataUnit.MB, 500));
    if (flowFiles.isEmpty()) {
        return;
    }
    final PublisherPool pool = getPublisherPool(context);
    if (pool == null) {
        context.yield();
        return;
    }
    final String securityProtocol = context.getProperty(KafkaProcessorUtils.SECURITY_PROTOCOL).getValue();
    final String bootstrapServers = context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue();
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final long startTime = System.nanoTime();
    try (final PublisherLease lease = pool.obtainPublisher()) {
        // Send each FlowFile to Kafka asynchronously.
        for (final FlowFile flowFile : flowFiles) {
            if (!isScheduled()) {
                // If stopped, re-queue FlowFile instead of sending it
                session.transfer(flowFile);
                continue;
            }
            final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(flowFile).getValue();
            final String messageKeyField = context.getProperty(MESSAGE_KEY_FIELD).evaluateAttributeExpressions(flowFile).getValue();
            final Map<String, String> attributes = flowFile.getAttributes();
            try {
                session.read(flowFile, new InputStreamCallback() {

                    @Override
                    public void process(final InputStream rawIn) throws IOException {
                        try (final InputStream in = new BufferedInputStream(rawIn)) {
                            final RecordReader reader = readerFactory.createRecordReader(attributes, in, getLogger());
                            final RecordSet recordSet = reader.createRecordSet();
                            final RecordSchema schema = writerFactory.getSchema(attributes, recordSet.getSchema());
                            lease.publish(flowFile, recordSet, writerFactory, schema, messageKeyField, topic);
                        } catch (final SchemaNotFoundException | MalformedRecordException e) {
                            throw new ProcessException(e);
                        }
                    }
                });
            } catch (final Exception e) {
                // The FlowFile will be obtained and the error logged below, when calling publishResult.getFailedFlowFiles()
                lease.getTracker().fail(flowFile, e);
                continue;
            }
        }
        // Complete the send
        final PublishResult publishResult = lease.complete();
        // Transfer any successful FlowFiles.
        final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime);
        for (FlowFile success : publishResult.getSuccessfulFlowFiles()) {
            final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(success).getValue();
            final int msgCount = publishResult.getSuccessfulMessageCount(success);
            success = session.putAttribute(success, MSG_COUNT, String.valueOf(msgCount));
            session.adjustCounter("Messages Sent", msgCount, true);
            final String transitUri = KafkaProcessorUtils.buildTransitURI(securityProtocol, bootstrapServers, topic);
            session.getProvenanceReporter().send(success, transitUri, "Sent " + msgCount + " messages", transmissionMillis);
            session.transfer(success, REL_SUCCESS);
        }
        // Transfer any failures.
        for (final FlowFile failure : publishResult.getFailedFlowFiles()) {
            final int successCount = publishResult.getSuccessfulMessageCount(failure);
            if (successCount > 0) {
                getLogger().error("Failed to send some messages for {} to Kafka, but {} messages were acknowledged by Kafka. Routing to failure due to {}", new Object[] { failure, successCount, publishResult.getReasonForFailure(failure) });
            } else {
                getLogger().error("Failed to send all message for {} to Kafka; routing to failure due to {}", new Object[] { failure, publishResult.getReasonForFailure(failure) });
            }
            session.transfer(failure, REL_FAILURE);
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) IOException(java.io.IOException) SchemaNotFoundException(org.apache.nifi.schema.access.SchemaNotFoundException) ProcessException(org.apache.nifi.processor.exception.ProcessException) MalformedRecordException(org.apache.nifi.serialization.MalformedRecordException) IOException(java.io.IOException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) ProcessException(org.apache.nifi.processor.exception.ProcessException) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) BufferedInputStream(java.io.BufferedInputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) RecordSet(org.apache.nifi.serialization.record.RecordSet) RecordSchema(org.apache.nifi.serialization.record.RecordSchema)

Example 34 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class PublishKafka_0_10 method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final boolean useDemarcator = context.getProperty(MESSAGE_DEMARCATOR).isSet();
    final List<FlowFile> flowFiles = session.get(FlowFileFilters.newSizeBasedFilter(250, DataUnit.KB, 500));
    if (flowFiles.isEmpty()) {
        return;
    }
    final PublisherPool pool = getPublisherPool(context);
    if (pool == null) {
        context.yield();
        return;
    }
    final String securityProtocol = context.getProperty(KafkaProcessorUtils.SECURITY_PROTOCOL).getValue();
    final String bootstrapServers = context.getProperty(KafkaProcessorUtils.BOOTSTRAP_SERVERS).evaluateAttributeExpressions().getValue();
    final long startTime = System.nanoTime();
    try (final PublisherLease lease = pool.obtainPublisher()) {
        // Send each FlowFile to Kafka asynchronously.
        for (final FlowFile flowFile : flowFiles) {
            if (!isScheduled()) {
                // If stopped, re-queue FlowFile instead of sending it
                session.transfer(flowFile);
                continue;
            }
            final byte[] messageKey = getMessageKey(flowFile, context);
            final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(flowFile).getValue();
            final byte[] demarcatorBytes;
            if (useDemarcator) {
                demarcatorBytes = context.getProperty(MESSAGE_DEMARCATOR).evaluateAttributeExpressions(flowFile).getValue().getBytes(StandardCharsets.UTF_8);
            } else {
                demarcatorBytes = null;
            }
            session.read(flowFile, new InputStreamCallback() {

                @Override
                public void process(final InputStream rawIn) throws IOException {
                    try (final InputStream in = new BufferedInputStream(rawIn)) {
                        lease.publish(flowFile, in, messageKey, demarcatorBytes, topic);
                    }
                }
            });
        }
        // Complete the send
        final PublishResult publishResult = lease.complete();
        // Transfer any successful FlowFiles.
        final long transmissionMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime);
        for (FlowFile success : publishResult.getSuccessfulFlowFiles()) {
            final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions(success).getValue();
            final int msgCount = publishResult.getSuccessfulMessageCount(success);
            success = session.putAttribute(success, MSG_COUNT, String.valueOf(msgCount));
            session.adjustCounter("Messages Sent", msgCount, true);
            final String transitUri = KafkaProcessorUtils.buildTransitURI(securityProtocol, bootstrapServers, topic);
            session.getProvenanceReporter().send(success, transitUri, "Sent " + msgCount + " messages", transmissionMillis);
            session.transfer(success, REL_SUCCESS);
        }
        // Transfer any failures.
        for (final FlowFile failure : publishResult.getFailedFlowFiles()) {
            final int successCount = publishResult.getSuccessfulMessageCount(failure);
            if (successCount > 0) {
                getLogger().error("Failed to send some messages for {} to Kafka, but {} messages were acknowledged by Kafka. Routing to failure due to {}", new Object[] { failure, successCount, publishResult.getReasonForFailure(failure) });
            } else {
                getLogger().error("Failed to send all message for {} to Kafka; routing to failure due to {}", new Object[] { failure, publishResult.getReasonForFailure(failure) });
            }
            session.transfer(failure, REL_FAILURE);
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) IOException(java.io.IOException) BufferedInputStream(java.io.BufferedInputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback)

Example 35 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class PutKafka method doRendezvousWithKafka.

/**
 * Will rendezvous with {@link KafkaPublisher} after building
 * {@link PublishingContext} and will produce the resulting {@link FlowFile}.
 * The resulting FlowFile contains all required information to determine
 * if message publishing originated from the provided FlowFile has actually
 * succeeded fully, partially or failed completely (see
 * {@link #isFailedFlowFile(FlowFile)}.
 */
private FlowFile doRendezvousWithKafka(final FlowFile flowFile, final ProcessContext context, final ProcessSession session) {
    final AtomicReference<KafkaPublisherResult> publishResultRef = new AtomicReference<>();
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(InputStream contentStream) throws IOException {
            PublishingContext publishingContext = PutKafka.this.buildPublishingContext(flowFile, context, contentStream);
            KafkaPublisherResult result = null;
            try {
                result = PutKafka.this.kafkaResource.publish(publishingContext);
            } catch (final IllegalArgumentException e) {
                getLogger().error("Failed to publish {}, due to {}", new Object[] { flowFile, e }, e);
                result = new KafkaPublisherResult(0, -1);
            }
            publishResultRef.set(result);
        }
    });
    FlowFile resultFile = publishResultRef.get().isAllAcked() ? this.cleanUpFlowFileIfNecessary(flowFile, session) : session.putAllAttributes(flowFile, this.buildFailedFlowFileAttributes(publishResultRef.get().getLastMessageAcked(), flowFile, context));
    return resultFile;
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) KafkaPublisherResult(org.apache.nifi.processors.kafka.KafkaPublisher.KafkaPublisherResult)

Aggregations

IOException (java.io.IOException)80 InputStream (java.io.InputStream)80 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)80 FlowFile (org.apache.nifi.flowfile.FlowFile)62 ProcessException (org.apache.nifi.processor.exception.ProcessException)35 ComponentLog (org.apache.nifi.logging.ComponentLog)27 HashMap (java.util.HashMap)25 AtomicReference (java.util.concurrent.atomic.AtomicReference)23 OutputStream (java.io.OutputStream)19 BufferedInputStream (java.io.BufferedInputStream)18 ArrayList (java.util.ArrayList)17 Map (java.util.Map)17 OutputStreamCallback (org.apache.nifi.processor.io.OutputStreamCallback)13 ByteArrayOutputStream (java.io.ByteArrayOutputStream)11 BufferedInputStream (org.apache.nifi.stream.io.BufferedInputStream)10 StopWatch (org.apache.nifi.util.StopWatch)10 HashSet (java.util.HashSet)9 Charset (java.nio.charset.Charset)8 FileInputStream (java.io.FileInputStream)7 ProcessSession (org.apache.nifi.processor.ProcessSession)7