Search in sources :

Example 31 with OutputStreamCallback

use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.

the class TestEvaluateJsonPath method testNullInput.

@Test
public void testNullInput() throws Exception {
    final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateJsonPath());
    testRunner.setProperty(EvaluateJsonPath.RETURN_TYPE, EvaluateJsonPath.RETURN_TYPE_JSON);
    testRunner.setProperty(EvaluateJsonPath.DESTINATION, EvaluateJsonPath.DESTINATION_ATTRIBUTE);
    testRunner.setProperty("stringField", "$.stringField");
    testRunner.setProperty("missingField", "$.missingField");
    testRunner.setProperty("nullField", "$.nullField");
    ProcessSession session = testRunner.getProcessSessionFactory().createSession();
    FlowFile ff = session.create();
    ff = session.write(ff, new OutputStreamCallback() {

        @Override
        public void process(OutputStream out) throws IOException {
            try (OutputStream outputStream = new BufferedOutputStream(out)) {
                outputStream.write("{\"stringField\": \"String Value\", \"nullField\": null}".getBytes(StandardCharsets.UTF_8));
            }
        }
    });
    testRunner.enqueue(ff);
    testRunner.run();
    testRunner.assertTransferCount(EvaluateJsonPath.REL_MATCH, 1);
    FlowFile output = testRunner.getFlowFilesForRelationship(EvaluateJsonPath.REL_MATCH).get(0);
    String validFieldValue = output.getAttribute("stringField");
    assertEquals("String Value", validFieldValue);
    String missingValue = output.getAttribute("missingField");
    assertEquals("Missing Value", "", missingValue);
    String nullValue = output.getAttribute("nullField");
    assertEquals("Null Value", "", nullValue);
}
Also used : ProcessSession(org.apache.nifi.processor.ProcessSession) FlowFile(org.apache.nifi.flowfile.FlowFile) MockFlowFile(org.apache.nifi.util.MockFlowFile) TestRunner(org.apache.nifi.util.TestRunner) OutputStream(java.io.OutputStream) BufferedOutputStream(org.apache.nifi.stream.io.BufferedOutputStream) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) BufferedOutputStream(org.apache.nifi.stream.io.BufferedOutputStream) Test(org.junit.Test)

Example 32 with OutputStreamCallback

use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.

the class TestSplitJson method testSplit_pathToNullValue.

@Test
public void testSplit_pathToNullValue() throws Exception {
    final TestRunner testRunner = TestRunners.newTestRunner(new SplitJson());
    testRunner.setProperty(SplitJson.ARRAY_JSON_PATH_EXPRESSION, "$.nullField");
    ProcessSession session = testRunner.getProcessSessionFactory().createSession();
    FlowFile ff = session.create();
    ff = session.write(ff, new OutputStreamCallback() {

        @Override
        public void process(OutputStream out) throws IOException {
            try (OutputStream outputStream = new BufferedOutputStream(out)) {
                outputStream.write("{\"stringField\": \"String Value\", \"nullField\": null}".getBytes(StandardCharsets.UTF_8));
            }
        }
    });
    testRunner.enqueue(ff);
    testRunner.run();
    testRunner.assertTransferCount(SplitJson.REL_FAILURE, 1);
}
Also used : ProcessSession(org.apache.nifi.processor.ProcessSession) FlowFile(org.apache.nifi.flowfile.FlowFile) MockFlowFile(org.apache.nifi.util.MockFlowFile) TestRunner(org.apache.nifi.util.TestRunner) OutputStream(java.io.OutputStream) BufferedOutputStream(org.apache.nifi.stream.io.BufferedOutputStream) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) BufferedOutputStream(org.apache.nifi.stream.io.BufferedOutputStream) Test(org.junit.Test)

Example 33 with OutputStreamCallback

use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.

the class GetHBase method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final String tableName = context.getProperty(TABLE_NAME).getValue();
    final String initialTimeRange = context.getProperty(INITIAL_TIMERANGE).getValue();
    final String filterExpression = context.getProperty(FILTER_EXPRESSION).getValue();
    final HBaseClientService hBaseClientService = context.getProperty(HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class);
    // if the table was changed then remove any previous state
    if (previousTable != null && !tableName.equals(previousTable)) {
        try {
            context.getStateManager().clear(Scope.CLUSTER);
        } catch (final IOException ioe) {
            getLogger().warn("Failed to clear Cluster State", ioe);
        }
        previousTable = tableName;
    }
    try {
        final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());
        final RowSerializer serializer = new JsonRowSerializer(charset);
        this.lastResult = getState(context.getStateManager());
        final long defaultMinTime = (initialTimeRange.equals(NONE.getValue()) ? 0L : System.currentTimeMillis());
        final long minTime = (lastResult == null ? defaultMinTime : lastResult.getTimestamp());
        final Map<String, Set<String>> cellsMatchingTimestamp = new HashMap<>();
        final AtomicReference<Long> rowsPulledHolder = new AtomicReference<>(0L);
        final AtomicReference<Long> latestTimestampHolder = new AtomicReference<>(minTime);
        hBaseClientService.scan(tableName, columns, filterExpression, minTime, new ResultHandler() {

            @Override
            public void handle(final byte[] rowKey, final ResultCell[] resultCells) {
                final String rowKeyString = new String(rowKey, StandardCharsets.UTF_8);
                // check if latest cell timestamp is equal to our cutoff.
                // if any of the cells have a timestamp later than our cutoff, then we
                // want the row. But if the cell with the latest timestamp is equal to
                // our cutoff, then we want to check if that's one of the cells that
                // we have already seen.
                long latestCellTimestamp = 0L;
                for (final ResultCell cell : resultCells) {
                    if (cell.getTimestamp() > latestCellTimestamp) {
                        latestCellTimestamp = cell.getTimestamp();
                    }
                }
                // we've already seen this.
                if (latestCellTimestamp < minTime) {
                    getLogger().debug("latest cell timestamp for row {} is {}, which is earlier than the minimum time of {}", new Object[] { rowKeyString, latestCellTimestamp, minTime });
                    return;
                }
                if (latestCellTimestamp == minTime) {
                    // latest cell timestamp is equal to our minimum time. Check if all cells that have
                    // that timestamp are in our list of previously seen cells.
                    boolean allSeen = true;
                    for (final ResultCell cell : resultCells) {
                        if (cell.getTimestamp() == latestCellTimestamp) {
                            if (lastResult == null || !lastResult.contains(cell)) {
                                allSeen = false;
                                break;
                            }
                        }
                    }
                    if (allSeen) {
                        // we have already seen all of the cells for this row. We do not want to
                        // include this cell in our output.
                        getLogger().debug("all cells for row {} have already been seen", new Object[] { rowKeyString });
                        return;
                    }
                }
                // we can ignore these cells.
                if (latestCellTimestamp >= latestTimestampHolder.get()) {
                    // new timestamp, so clear all of the 'matching cells'
                    if (latestCellTimestamp > latestTimestampHolder.get()) {
                        latestTimestampHolder.set(latestCellTimestamp);
                        cellsMatchingTimestamp.clear();
                    }
                    for (final ResultCell cell : resultCells) {
                        final long ts = cell.getTimestamp();
                        if (ts == latestCellTimestamp) {
                            final byte[] rowValue = Arrays.copyOfRange(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength() + cell.getRowOffset());
                            final byte[] cellValue = Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength() + cell.getValueOffset());
                            final String rowHash = new String(rowValue, StandardCharsets.UTF_8);
                            Set<String> cellHashes = cellsMatchingTimestamp.get(rowHash);
                            if (cellHashes == null) {
                                cellHashes = new HashSet<>();
                                cellsMatchingTimestamp.put(rowHash, cellHashes);
                            }
                            cellHashes.add(new String(cellValue, StandardCharsets.UTF_8));
                        }
                    }
                }
                // write the row to a new FlowFile.
                FlowFile flowFile = session.create();
                flowFile = session.write(flowFile, new OutputStreamCallback() {

                    @Override
                    public void process(final OutputStream out) throws IOException {
                        serializer.serialize(rowKey, resultCells, out);
                    }
                });
                final Map<String, String> attributes = new HashMap<>();
                attributes.put("hbase.table", tableName);
                attributes.put("mime.type", "application/json");
                flowFile = session.putAllAttributes(flowFile, attributes);
                session.getProvenanceReporter().receive(flowFile, hBaseClientService.toTransitUri(tableName, rowKeyString));
                session.transfer(flowFile, REL_SUCCESS);
                getLogger().debug("Received {} from HBase with row key {}", new Object[] { flowFile, rowKeyString });
                // we could potentially have a huge number of rows. If we get to 500, go ahead and commit the
                // session so that we can avoid buffering tons of FlowFiles without ever sending any out.
                long rowsPulled = rowsPulledHolder.get();
                rowsPulledHolder.set(++rowsPulled);
                if (++rowsPulled % getBatchSize() == 0) {
                    session.commit();
                }
            }
        });
        final ScanResult scanResults = new ScanResult(latestTimestampHolder.get(), cellsMatchingTimestamp);
        // Commit session before we replace the lastResult; if session commit fails, we want
        // to pull these records again.
        session.commit();
        if (lastResult == null || scanResults.getTimestamp() > lastResult.getTimestamp()) {
            lastResult = scanResults;
        } else if (scanResults.getTimestamp() == lastResult.getTimestamp()) {
            final Map<String, Set<String>> combinedResults = new HashMap<>(scanResults.getMatchingCells());
            // do a deep copy because the Set may be modified below.
            for (final Map.Entry<String, Set<String>> entry : scanResults.getMatchingCells().entrySet()) {
                combinedResults.put(entry.getKey(), new HashSet<>(entry.getValue()));
            }
            // combined the results from 'lastResult'
            for (final Map.Entry<String, Set<String>> entry : lastResult.getMatchingCells().entrySet()) {
                final Set<String> existing = combinedResults.get(entry.getKey());
                if (existing == null) {
                    combinedResults.put(entry.getKey(), new HashSet<>(entry.getValue()));
                } else {
                    existing.addAll(entry.getValue());
                }
            }
            final ScanResult scanResult = new ScanResult(scanResults.getTimestamp(), combinedResults);
            lastResult = scanResult;
        }
        // save state using the framework's state manager
        storeState(lastResult, context.getStateManager());
    } catch (final IOException e) {
        getLogger().error("Failed to receive data from HBase due to {}", e);
        session.rollback();
    } finally {
        // if we failed, we want to yield so that we don't hammer hbase. If we succeed, then we have
        // pulled all of the records, so we want to wait a bit before hitting hbase again anyway.
        context.yield();
    }
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) OutputStream(java.io.OutputStream) ResultHandler(org.apache.nifi.hbase.scan.ResultHandler) ResultCell(org.apache.nifi.hbase.scan.ResultCell) JsonRowSerializer(org.apache.nifi.hbase.io.JsonRowSerializer) RowSerializer(org.apache.nifi.hbase.io.RowSerializer) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) HashSet(java.util.HashSet) FlowFile(org.apache.nifi.flowfile.FlowFile) Charset(java.nio.charset.Charset) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) JsonRowSerializer(org.apache.nifi.hbase.io.JsonRowSerializer) Map(java.util.Map) StateMap(org.apache.nifi.components.state.StateMap) HashMap(java.util.HashMap)

Example 34 with OutputStreamCallback

use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.

the class GetKafka method consumeFromKafka.

private void consumeFromKafka(final ProcessContext context, final ProcessSession session, ConsumerIterator<byte[], byte[]> iterator) throws ProcessException {
    final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
    final String demarcator = context.getProperty(MESSAGE_DEMARCATOR).getValue().replace("\\n", "\n").replace("\\r", "\r").replace("\\t", "\t");
    final byte[] demarcatorBytes = demarcator.getBytes(StandardCharsets.UTF_8);
    final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions().getValue();
    FlowFile flowFile = session.create();
    final Map<String, String> attributes = new HashMap<>();
    attributes.put("kafka.topic", topic);
    final long start = System.nanoTime();
    int msgCount = 0;
    try {
        for (; msgCount < batchSize && iterator.hasNext(); msgCount++) {
            final MessageAndMetadata<byte[], byte[]> mam = iterator.next();
            if (batchSize == 1) {
                final byte[] key = mam.key();
                // for a batch size of 1.
                if (key != null) {
                    attributes.put("kafka.key", new String(key, StandardCharsets.UTF_8));
                }
                attributes.put("kafka.offset", String.valueOf(mam.offset()));
                attributes.put("kafka.partition", String.valueOf(mam.partition()));
            }
            // add the message to the FlowFile's contents
            final boolean firstMessage = msgCount == 0;
            flowFile = session.append(flowFile, new OutputStreamCallback() {

                @Override
                public void process(final OutputStream out) throws IOException {
                    if (!firstMessage) {
                        out.write(demarcatorBytes);
                    }
                    out.write(mam.message());
                }
            });
        }
        this.releaseFlowFile(flowFile, session, attributes, start, topic, msgCount);
    } catch (ConsumerTimeoutException e) {
        /*
             * By default Kafka blocks indefinitely if topic is empty via
             * stream.hasNext(). If 'consumer.timeout.ms' property is set (see
             * http://kafka.apache.org/documentation.html#configuration) the
             * hasNext() will fail with this exception. To this processor it
             * simply means there are no messages and current task should exit
             * in non-failure releasing the flow file if it was able to
             * accumulate any events.
             */
        this.releaseFlowFile(flowFile, session, attributes, start, topic, msgCount);
    } catch (final Exception e) {
        this.shutdownConsumer();
        getLogger().error("Failed to receive FlowFile from Kafka due to {}", new Object[] { e });
        if (flowFile != null) {
            session.remove(flowFile);
        }
    } finally {
        // Add the iterator back to the queue
        if (iterator != null) {
            streamIterators.offer(iterator);
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) OutputStream(java.io.OutputStream) ConsumerTimeoutException(kafka.consumer.ConsumerTimeoutException) TimeoutException(java.util.concurrent.TimeoutException) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) ConsumerTimeoutException(kafka.consumer.ConsumerTimeoutException) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback)

Example 35 with OutputStreamCallback

use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.

the class AbstractFlumeProcessor method transferEvent.

protected static void transferEvent(final Event event, ProcessSession session, Relationship relationship) {
    FlowFile flowFile = session.create();
    flowFile = session.putAllAttributes(flowFile, event.getHeaders());
    flowFile = session.write(flowFile, new OutputStreamCallback() {

        @Override
        public void process(final OutputStream out) throws IOException {
            out.write(event.getBody());
        }
    });
    session.getProvenanceReporter().create(flowFile);
    session.transfer(flowFile, relationship);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) OutputStream(java.io.OutputStream) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback)

Aggregations

FlowFile (org.apache.nifi.flowfile.FlowFile)70 OutputStreamCallback (org.apache.nifi.processor.io.OutputStreamCallback)70 OutputStream (java.io.OutputStream)69 IOException (java.io.IOException)39 ProcessException (org.apache.nifi.processor.exception.ProcessException)27 HashMap (java.util.HashMap)25 InputStream (java.io.InputStream)24 Test (org.junit.Test)24 MockFlowFile (org.apache.nifi.util.MockFlowFile)23 ByteArrayOutputStream (java.io.ByteArrayOutputStream)20 ComponentLog (org.apache.nifi.logging.ComponentLog)17 FileOutputStream (java.io.FileOutputStream)16 FilterOutputStream (java.io.FilterOutputStream)16 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)14 ArrayList (java.util.ArrayList)12 Map (java.util.Map)12 ProcessSession (org.apache.nifi.processor.ProcessSession)12 BufferedOutputStream (org.apache.nifi.stream.io.BufferedOutputStream)10 AtomicReference (java.util.concurrent.atomic.AtomicReference)9 StandardContentClaim (org.apache.nifi.controller.repository.claim.StandardContentClaim)9