Search in sources :

Example 66 with OutputStreamCallback

use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.

the class ValidateCsv method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final CsvPreference csvPref = getPreference(context, flowFile);
    final boolean header = context.getProperty(HEADER).asBoolean();
    final ComponentLog logger = getLogger();
    final String schema = context.getProperty(SCHEMA).evaluateAttributeExpressions(flowFile).getValue();
    final CellProcessor[] cellProcs = this.parseSchema(schema);
    final boolean isWholeFFValidation = context.getProperty(VALIDATION_STRATEGY).getValue().equals(VALIDATE_WHOLE_FLOWFILE.getValue());
    final AtomicReference<Boolean> valid = new AtomicReference<Boolean>(true);
    final AtomicReference<Boolean> isFirstLineValid = new AtomicReference<Boolean>(true);
    final AtomicReference<Boolean> isFirstLineInvalid = new AtomicReference<Boolean>(true);
    final AtomicReference<Integer> okCount = new AtomicReference<Integer>(0);
    final AtomicReference<Integer> totalCount = new AtomicReference<Integer>(0);
    final AtomicReference<FlowFile> invalidFF = new AtomicReference<FlowFile>(null);
    final AtomicReference<FlowFile> validFF = new AtomicReference<FlowFile>(null);
    if (!isWholeFFValidation) {
        invalidFF.set(session.create(flowFile));
        validFF.set(session.create(flowFile));
    }
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream in) throws IOException {
            NifiCsvListReader listReader = null;
            try {
                listReader = new NifiCsvListReader(new InputStreamReader(in), csvPref);
                // handling of header
                if (header) {
                    List<String> headerList = listReader.read();
                    if (!isWholeFFValidation) {
                        invalidFF.set(session.append(invalidFF.get(), new OutputStreamCallback() {

                            @Override
                            public void process(OutputStream out) throws IOException {
                                out.write(print(headerList, csvPref, isFirstLineInvalid.get()));
                            }
                        }));
                        validFF.set(session.append(validFF.get(), new OutputStreamCallback() {

                            @Override
                            public void process(OutputStream out) throws IOException {
                                out.write(print(headerList, csvPref, isFirstLineValid.get()));
                            }
                        }));
                        isFirstLineValid.set(false);
                        isFirstLineInvalid.set(false);
                    }
                }
                boolean stop = false;
                while (!stop) {
                    try {
                        final List<Object> list = listReader.read(cellProcs);
                        stop = list == null;
                        if (!isWholeFFValidation && !stop) {
                            validFF.set(session.append(validFF.get(), new OutputStreamCallback() {

                                @Override
                                public void process(OutputStream out) throws IOException {
                                    out.write(print(list, csvPref, isFirstLineValid.get()));
                                }
                            }));
                            okCount.set(okCount.get() + 1);
                            if (isFirstLineValid.get()) {
                                isFirstLineValid.set(false);
                            }
                        }
                    } catch (final SuperCsvException e) {
                        valid.set(false);
                        if (isWholeFFValidation) {
                            logger.debug("Failed to validate {} against schema due to {}; routing to 'invalid'", new Object[] { flowFile }, e);
                            break;
                        } else {
                            // we append the invalid line to the flow file that will be routed to invalid relationship
                            invalidFF.set(session.append(invalidFF.get(), new OutputStreamCallback() {

                                @Override
                                public void process(OutputStream out) throws IOException {
                                    out.write(print(e.getCsvContext().getRowSource(), csvPref, isFirstLineInvalid.get()));
                                }
                            }));
                            if (isFirstLineInvalid.get()) {
                                isFirstLineInvalid.set(false);
                            }
                        }
                    } finally {
                        if (!isWholeFFValidation) {
                            totalCount.set(totalCount.get() + 1);
                        }
                    }
                }
            } catch (final IOException e) {
                valid.set(false);
                logger.error("Failed to validate {} against schema due to {}", new Object[] { flowFile }, e);
            } finally {
                if (listReader != null) {
                    listReader.close();
                }
            }
        }
    });
    if (isWholeFFValidation) {
        if (valid.get()) {
            logger.debug("Successfully validated {} against schema; routing to 'valid'", new Object[] { flowFile });
            session.getProvenanceReporter().route(flowFile, REL_VALID);
            session.transfer(flowFile, REL_VALID);
        } else {
            session.getProvenanceReporter().route(flowFile, REL_INVALID);
            session.transfer(flowFile, REL_INVALID);
        }
    } else {
        if (valid.get()) {
            logger.debug("Successfully validated {} against schema; routing to 'valid'", new Object[] { validFF.get() });
            session.getProvenanceReporter().route(validFF.get(), REL_VALID, "All " + totalCount.get() + " line(s) are valid");
            session.putAttribute(validFF.get(), "count.valid.lines", Integer.toString(totalCount.get()));
            session.putAttribute(validFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
            session.transfer(validFF.get(), REL_VALID);
            session.remove(invalidFF.get());
            session.remove(flowFile);
        } else if (okCount.get() != 0) {
            // because of the finally within the 'while' loop
            totalCount.set(totalCount.get() - 1);
            logger.debug("Successfully validated {}/{} line(s) in {} against schema; routing valid lines to 'valid' and invalid lines to 'invalid'", new Object[] { okCount.get(), totalCount.get(), flowFile });
            session.getProvenanceReporter().route(validFF.get(), REL_VALID, okCount.get() + " valid line(s)");
            session.putAttribute(validFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
            session.putAttribute(validFF.get(), "count.valid.lines", Integer.toString(okCount.get()));
            session.transfer(validFF.get(), REL_VALID);
            session.getProvenanceReporter().route(invalidFF.get(), REL_INVALID, (totalCount.get() - okCount.get()) + " invalid line(s)");
            session.putAttribute(invalidFF.get(), "count.invalid.lines", Integer.toString((totalCount.get() - okCount.get())));
            session.putAttribute(invalidFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
            session.transfer(invalidFF.get(), REL_INVALID);
            session.remove(flowFile);
        } else {
            logger.debug("All lines in {} are invalid; routing to 'invalid'", new Object[] { invalidFF.get() });
            session.getProvenanceReporter().route(invalidFF.get(), REL_INVALID, "All " + totalCount.get() + " line(s) are invalid");
            session.putAttribute(invalidFF.get(), "count.invalid.lines", Integer.toString(totalCount.get()));
            session.putAttribute(invalidFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
            session.transfer(invalidFF.get(), REL_INVALID);
            session.remove(validFF.get());
            session.remove(flowFile);
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) SuperCsvException(org.supercsv.exception.SuperCsvException) CsvPreference(org.supercsv.prefs.CsvPreference) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) CellProcessor(org.supercsv.cellprocessor.ift.CellProcessor) List(java.util.List) ArrayList(java.util.ArrayList) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback)

Example 67 with OutputStreamCallback

use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.

the class TestSplitJson method testSplit_pathToArrayWithNulls_nullStringRepresentation.

@Test
public void testSplit_pathToArrayWithNulls_nullStringRepresentation() throws Exception {
    final TestRunner testRunner = TestRunners.newTestRunner(new SplitJson());
    testRunner.setProperty(SplitJson.ARRAY_JSON_PATH_EXPRESSION, "$.arrayOfNulls");
    testRunner.setProperty(SplitJson.NULL_VALUE_DEFAULT_REPRESENTATION, AbstractJsonPathProcessor.NULL_STRING_OPTION);
    ProcessSession session = testRunner.getProcessSessionFactory().createSession();
    FlowFile ff = session.create();
    ff = session.write(ff, new OutputStreamCallback() {

        @Override
        public void process(OutputStream out) throws IOException {
            try (OutputStream outputStream = new BufferedOutputStream(out)) {
                outputStream.write("{\"stringField\": \"String Value\", \"arrayOfNulls\": [null, null, null]}".getBytes(StandardCharsets.UTF_8));
            }
        }
    });
    testRunner.enqueue(ff);
    testRunner.run();
    /* assert that three files were transferred to split and each has the word null in it */
    int expectedFiles = 3;
    testRunner.assertTransferCount(SplitJson.REL_SPLIT, expectedFiles);
    for (int i = 0; i < expectedFiles; i++) {
        testRunner.getFlowFilesForRelationship(SplitJson.REL_SPLIT).get(i).assertContentEquals("null");
    }
}
Also used : ProcessSession(org.apache.nifi.processor.ProcessSession) FlowFile(org.apache.nifi.flowfile.FlowFile) MockFlowFile(org.apache.nifi.util.MockFlowFile) TestRunner(org.apache.nifi.util.TestRunner) OutputStream(java.io.OutputStream) BufferedOutputStream(org.apache.nifi.stream.io.BufferedOutputStream) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) BufferedOutputStream(org.apache.nifi.stream.io.BufferedOutputStream) Test(org.junit.Test)

Example 68 with OutputStreamCallback

use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.

the class TestSplitJson method testSplit_pathToArrayWithNulls_emptyStringRepresentation.

@Test
public void testSplit_pathToArrayWithNulls_emptyStringRepresentation() throws Exception {
    final TestRunner testRunner = TestRunners.newTestRunner(new SplitJson());
    testRunner.setProperty(SplitJson.ARRAY_JSON_PATH_EXPRESSION, "$.arrayOfNulls");
    ProcessSession session = testRunner.getProcessSessionFactory().createSession();
    FlowFile ff = session.create();
    ff = session.write(ff, new OutputStreamCallback() {

        @Override
        public void process(OutputStream out) throws IOException {
            try (OutputStream outputStream = new BufferedOutputStream(out)) {
                outputStream.write("{\"stringField\": \"String Value\", \"arrayOfNulls\": [null, null, null]}".getBytes(StandardCharsets.UTF_8));
            }
        }
    });
    testRunner.enqueue(ff);
    testRunner.run();
    /* assert that three files were transferred to split and each is empty */
    int expectedFiles = 3;
    testRunner.assertTransferCount(SplitJson.REL_SPLIT, expectedFiles);
    for (int i = 0; i < expectedFiles; i++) {
        testRunner.getFlowFilesForRelationship(SplitJson.REL_SPLIT).get(i).assertContentEquals("");
    }
}
Also used : ProcessSession(org.apache.nifi.processor.ProcessSession) FlowFile(org.apache.nifi.flowfile.FlowFile) MockFlowFile(org.apache.nifi.util.MockFlowFile) TestRunner(org.apache.nifi.util.TestRunner) OutputStream(java.io.OutputStream) BufferedOutputStream(org.apache.nifi.stream.io.BufferedOutputStream) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) BufferedOutputStream(org.apache.nifi.stream.io.BufferedOutputStream) Test(org.junit.Test)

Example 69 with OutputStreamCallback

use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.

the class WriteResourceToStream method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    try {
        flowFile = session.write(flowFile, new OutputStreamCallback() {

            @Override
            public void process(OutputStream out) throws IOException {
                IOUtils.write(resourceData, out);
            }
        });
        session.transfer(flowFile, REL_SUCCESS);
    } catch (ProcessException ex) {
        getLogger().error("Unable to process", ex);
        session.transfer(flowFile, REL_FAILURE);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) ProcessException(org.apache.nifi.processor.exception.ProcessException) OutputStream(java.io.OutputStream) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback)

Example 70 with OutputStreamCallback

use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.

the class GetHDFSEvents method onTrigger.

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final StateManager stateManager = context.getStateManager();
    try {
        StateMap state = stateManager.getState(Scope.CLUSTER);
        String txIdAsString = state.get(LAST_TX_ID);
        if (txIdAsString != null && !"".equals(txIdAsString)) {
            lastTxId = Long.parseLong(txIdAsString);
        }
    } catch (IOException e) {
        getLogger().error("Unable to retrieve last transaction ID. Must retrieve last processed transaction ID before processing can occur.", e);
        context.yield();
        return;
    }
    try {
        final int retries = context.getProperty(NUMBER_OF_RETRIES_FOR_POLL).asInteger();
        final TimeUnit pollDurationTimeUnit = TimeUnit.MICROSECONDS;
        final long pollDuration = context.getProperty(POLL_DURATION).asTimePeriod(pollDurationTimeUnit);
        final DFSInotifyEventInputStream eventStream = lastTxId == -1L ? getHdfsAdmin().getInotifyEventStream() : getHdfsAdmin().getInotifyEventStream(lastTxId);
        final EventBatch eventBatch = getEventBatch(eventStream, pollDuration, pollDurationTimeUnit, retries);
        if (eventBatch != null && eventBatch.getEvents() != null) {
            if (eventBatch.getEvents().length > 0) {
                List<FlowFile> flowFiles = new ArrayList<>(eventBatch.getEvents().length);
                for (Event e : eventBatch.getEvents()) {
                    if (toProcessEvent(context, e)) {
                        getLogger().debug("Creating flow file for event: {}.", new Object[] { e });
                        final String path = getPath(e);
                        FlowFile flowFile = session.create();
                        flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/json");
                        flowFile = session.putAttribute(flowFile, EventAttributes.EVENT_TYPE, e.getEventType().name());
                        flowFile = session.putAttribute(flowFile, EventAttributes.EVENT_PATH, path);
                        flowFile = session.write(flowFile, new OutputStreamCallback() {

                            @Override
                            public void process(OutputStream out) throws IOException {
                                out.write(OBJECT_MAPPER.writeValueAsBytes(e));
                            }
                        });
                        flowFiles.add(flowFile);
                    }
                }
                for (FlowFile flowFile : flowFiles) {
                    final String path = flowFile.getAttribute(EventAttributes.EVENT_PATH);
                    final String transitUri = path.startsWith("/") ? "hdfs:/" + path : "hdfs://" + path;
                    getLogger().debug("Transferring flow file {} and creating provenance event with URI {}.", new Object[] { flowFile, transitUri });
                    session.transfer(flowFile, REL_SUCCESS);
                    session.getProvenanceReporter().receive(flowFile, transitUri);
                }
            }
            lastTxId = eventBatch.getTxid();
        }
    } catch (IOException | InterruptedException e) {
        getLogger().error("Unable to get notification information: {}", new Object[] { e });
        context.yield();
        return;
    } catch (MissingEventsException e) {
        // set lastTxId to -1 and update state. This may cause events not to be processed. The reason this exception is thrown is described in the
        // org.apache.hadoop.hdfs.client.HdfsAdmin#getInotifyEventStrea API. It suggests tuning a couple parameters if this API is used.
        lastTxId = -1L;
        getLogger().error("Unable to get notification information. Setting transaction id to -1. This may cause some events to get missed. " + "Please see javadoc for org.apache.hadoop.hdfs.client.HdfsAdmin#getInotifyEventStream: {}", new Object[] { e });
    }
    updateClusterStateForTxId(stateManager);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) StateMap(org.apache.nifi.components.state.StateMap) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) MissingEventsException(org.apache.hadoop.hdfs.inotify.MissingEventsException) StateManager(org.apache.nifi.components.state.StateManager) TimeUnit(java.util.concurrent.TimeUnit) Event(org.apache.hadoop.hdfs.inotify.Event) DFSInotifyEventInputStream(org.apache.hadoop.hdfs.DFSInotifyEventInputStream) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) EventBatch(org.apache.hadoop.hdfs.inotify.EventBatch)

Aggregations

FlowFile (org.apache.nifi.flowfile.FlowFile)70 OutputStreamCallback (org.apache.nifi.processor.io.OutputStreamCallback)70 OutputStream (java.io.OutputStream)69 IOException (java.io.IOException)39 ProcessException (org.apache.nifi.processor.exception.ProcessException)27 HashMap (java.util.HashMap)25 InputStream (java.io.InputStream)24 Test (org.junit.Test)24 MockFlowFile (org.apache.nifi.util.MockFlowFile)23 ByteArrayOutputStream (java.io.ByteArrayOutputStream)20 ComponentLog (org.apache.nifi.logging.ComponentLog)17 FileOutputStream (java.io.FileOutputStream)16 FilterOutputStream (java.io.FilterOutputStream)16 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)14 ArrayList (java.util.ArrayList)12 Map (java.util.Map)12 ProcessSession (org.apache.nifi.processor.ProcessSession)12 BufferedOutputStream (org.apache.nifi.stream.io.BufferedOutputStream)10 AtomicReference (java.util.concurrent.atomic.AtomicReference)9 StandardContentClaim (org.apache.nifi.controller.repository.claim.StandardContentClaim)9