use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.
the class ValidateCsv method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final CsvPreference csvPref = getPreference(context, flowFile);
final boolean header = context.getProperty(HEADER).asBoolean();
final ComponentLog logger = getLogger();
final String schema = context.getProperty(SCHEMA).evaluateAttributeExpressions(flowFile).getValue();
final CellProcessor[] cellProcs = this.parseSchema(schema);
final boolean isWholeFFValidation = context.getProperty(VALIDATION_STRATEGY).getValue().equals(VALIDATE_WHOLE_FLOWFILE.getValue());
final AtomicReference<Boolean> valid = new AtomicReference<Boolean>(true);
final AtomicReference<Boolean> isFirstLineValid = new AtomicReference<Boolean>(true);
final AtomicReference<Boolean> isFirstLineInvalid = new AtomicReference<Boolean>(true);
final AtomicReference<Integer> okCount = new AtomicReference<Integer>(0);
final AtomicReference<Integer> totalCount = new AtomicReference<Integer>(0);
final AtomicReference<FlowFile> invalidFF = new AtomicReference<FlowFile>(null);
final AtomicReference<FlowFile> validFF = new AtomicReference<FlowFile>(null);
if (!isWholeFFValidation) {
invalidFF.set(session.create(flowFile));
validFF.set(session.create(flowFile));
}
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
NifiCsvListReader listReader = null;
try {
listReader = new NifiCsvListReader(new InputStreamReader(in), csvPref);
// handling of header
if (header) {
List<String> headerList = listReader.read();
if (!isWholeFFValidation) {
invalidFF.set(session.append(invalidFF.get(), new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write(print(headerList, csvPref, isFirstLineInvalid.get()));
}
}));
validFF.set(session.append(validFF.get(), new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write(print(headerList, csvPref, isFirstLineValid.get()));
}
}));
isFirstLineValid.set(false);
isFirstLineInvalid.set(false);
}
}
boolean stop = false;
while (!stop) {
try {
final List<Object> list = listReader.read(cellProcs);
stop = list == null;
if (!isWholeFFValidation && !stop) {
validFF.set(session.append(validFF.get(), new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write(print(list, csvPref, isFirstLineValid.get()));
}
}));
okCount.set(okCount.get() + 1);
if (isFirstLineValid.get()) {
isFirstLineValid.set(false);
}
}
} catch (final SuperCsvException e) {
valid.set(false);
if (isWholeFFValidation) {
logger.debug("Failed to validate {} against schema due to {}; routing to 'invalid'", new Object[] { flowFile }, e);
break;
} else {
// we append the invalid line to the flow file that will be routed to invalid relationship
invalidFF.set(session.append(invalidFF.get(), new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write(print(e.getCsvContext().getRowSource(), csvPref, isFirstLineInvalid.get()));
}
}));
if (isFirstLineInvalid.get()) {
isFirstLineInvalid.set(false);
}
}
} finally {
if (!isWholeFFValidation) {
totalCount.set(totalCount.get() + 1);
}
}
}
} catch (final IOException e) {
valid.set(false);
logger.error("Failed to validate {} against schema due to {}", new Object[] { flowFile }, e);
} finally {
if (listReader != null) {
listReader.close();
}
}
}
});
if (isWholeFFValidation) {
if (valid.get()) {
logger.debug("Successfully validated {} against schema; routing to 'valid'", new Object[] { flowFile });
session.getProvenanceReporter().route(flowFile, REL_VALID);
session.transfer(flowFile, REL_VALID);
} else {
session.getProvenanceReporter().route(flowFile, REL_INVALID);
session.transfer(flowFile, REL_INVALID);
}
} else {
if (valid.get()) {
logger.debug("Successfully validated {} against schema; routing to 'valid'", new Object[] { validFF.get() });
session.getProvenanceReporter().route(validFF.get(), REL_VALID, "All " + totalCount.get() + " line(s) are valid");
session.putAttribute(validFF.get(), "count.valid.lines", Integer.toString(totalCount.get()));
session.putAttribute(validFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
session.transfer(validFF.get(), REL_VALID);
session.remove(invalidFF.get());
session.remove(flowFile);
} else if (okCount.get() != 0) {
// because of the finally within the 'while' loop
totalCount.set(totalCount.get() - 1);
logger.debug("Successfully validated {}/{} line(s) in {} against schema; routing valid lines to 'valid' and invalid lines to 'invalid'", new Object[] { okCount.get(), totalCount.get(), flowFile });
session.getProvenanceReporter().route(validFF.get(), REL_VALID, okCount.get() + " valid line(s)");
session.putAttribute(validFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
session.putAttribute(validFF.get(), "count.valid.lines", Integer.toString(okCount.get()));
session.transfer(validFF.get(), REL_VALID);
session.getProvenanceReporter().route(invalidFF.get(), REL_INVALID, (totalCount.get() - okCount.get()) + " invalid line(s)");
session.putAttribute(invalidFF.get(), "count.invalid.lines", Integer.toString((totalCount.get() - okCount.get())));
session.putAttribute(invalidFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
session.transfer(invalidFF.get(), REL_INVALID);
session.remove(flowFile);
} else {
logger.debug("All lines in {} are invalid; routing to 'invalid'", new Object[] { invalidFF.get() });
session.getProvenanceReporter().route(invalidFF.get(), REL_INVALID, "All " + totalCount.get() + " line(s) are invalid");
session.putAttribute(invalidFF.get(), "count.invalid.lines", Integer.toString(totalCount.get()));
session.putAttribute(invalidFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
session.transfer(invalidFF.get(), REL_INVALID);
session.remove(validFF.get());
session.remove(flowFile);
}
}
}
use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.
the class TestSplitJson method testSplit_pathToArrayWithNulls_nullStringRepresentation.
@Test
public void testSplit_pathToArrayWithNulls_nullStringRepresentation() throws Exception {
final TestRunner testRunner = TestRunners.newTestRunner(new SplitJson());
testRunner.setProperty(SplitJson.ARRAY_JSON_PATH_EXPRESSION, "$.arrayOfNulls");
testRunner.setProperty(SplitJson.NULL_VALUE_DEFAULT_REPRESENTATION, AbstractJsonPathProcessor.NULL_STRING_OPTION);
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
FlowFile ff = session.create();
ff = session.write(ff, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
try (OutputStream outputStream = new BufferedOutputStream(out)) {
outputStream.write("{\"stringField\": \"String Value\", \"arrayOfNulls\": [null, null, null]}".getBytes(StandardCharsets.UTF_8));
}
}
});
testRunner.enqueue(ff);
testRunner.run();
/* assert that three files were transferred to split and each has the word null in it */
int expectedFiles = 3;
testRunner.assertTransferCount(SplitJson.REL_SPLIT, expectedFiles);
for (int i = 0; i < expectedFiles; i++) {
testRunner.getFlowFilesForRelationship(SplitJson.REL_SPLIT).get(i).assertContentEquals("null");
}
}
use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.
the class TestSplitJson method testSplit_pathToArrayWithNulls_emptyStringRepresentation.
@Test
public void testSplit_pathToArrayWithNulls_emptyStringRepresentation() throws Exception {
final TestRunner testRunner = TestRunners.newTestRunner(new SplitJson());
testRunner.setProperty(SplitJson.ARRAY_JSON_PATH_EXPRESSION, "$.arrayOfNulls");
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
FlowFile ff = session.create();
ff = session.write(ff, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
try (OutputStream outputStream = new BufferedOutputStream(out)) {
outputStream.write("{\"stringField\": \"String Value\", \"arrayOfNulls\": [null, null, null]}".getBytes(StandardCharsets.UTF_8));
}
}
});
testRunner.enqueue(ff);
testRunner.run();
/* assert that three files were transferred to split and each is empty */
int expectedFiles = 3;
testRunner.assertTransferCount(SplitJson.REL_SPLIT, expectedFiles);
for (int i = 0; i < expectedFiles; i++) {
testRunner.getFlowFilesForRelationship(SplitJson.REL_SPLIT).get(i).assertContentEquals("");
}
}
use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.
the class WriteResourceToStream method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
try {
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
IOUtils.write(resourceData, out);
}
});
session.transfer(flowFile, REL_SUCCESS);
} catch (ProcessException ex) {
getLogger().error("Unable to process", ex);
session.transfer(flowFile, REL_FAILURE);
}
}
use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.
the class GetHDFSEvents method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
final StateManager stateManager = context.getStateManager();
try {
StateMap state = stateManager.getState(Scope.CLUSTER);
String txIdAsString = state.get(LAST_TX_ID);
if (txIdAsString != null && !"".equals(txIdAsString)) {
lastTxId = Long.parseLong(txIdAsString);
}
} catch (IOException e) {
getLogger().error("Unable to retrieve last transaction ID. Must retrieve last processed transaction ID before processing can occur.", e);
context.yield();
return;
}
try {
final int retries = context.getProperty(NUMBER_OF_RETRIES_FOR_POLL).asInteger();
final TimeUnit pollDurationTimeUnit = TimeUnit.MICROSECONDS;
final long pollDuration = context.getProperty(POLL_DURATION).asTimePeriod(pollDurationTimeUnit);
final DFSInotifyEventInputStream eventStream = lastTxId == -1L ? getHdfsAdmin().getInotifyEventStream() : getHdfsAdmin().getInotifyEventStream(lastTxId);
final EventBatch eventBatch = getEventBatch(eventStream, pollDuration, pollDurationTimeUnit, retries);
if (eventBatch != null && eventBatch.getEvents() != null) {
if (eventBatch.getEvents().length > 0) {
List<FlowFile> flowFiles = new ArrayList<>(eventBatch.getEvents().length);
for (Event e : eventBatch.getEvents()) {
if (toProcessEvent(context, e)) {
getLogger().debug("Creating flow file for event: {}.", new Object[] { e });
final String path = getPath(e);
FlowFile flowFile = session.create();
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/json");
flowFile = session.putAttribute(flowFile, EventAttributes.EVENT_TYPE, e.getEventType().name());
flowFile = session.putAttribute(flowFile, EventAttributes.EVENT_PATH, path);
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write(OBJECT_MAPPER.writeValueAsBytes(e));
}
});
flowFiles.add(flowFile);
}
}
for (FlowFile flowFile : flowFiles) {
final String path = flowFile.getAttribute(EventAttributes.EVENT_PATH);
final String transitUri = path.startsWith("/") ? "hdfs:/" + path : "hdfs://" + path;
getLogger().debug("Transferring flow file {} and creating provenance event with URI {}.", new Object[] { flowFile, transitUri });
session.transfer(flowFile, REL_SUCCESS);
session.getProvenanceReporter().receive(flowFile, transitUri);
}
}
lastTxId = eventBatch.getTxid();
}
} catch (IOException | InterruptedException e) {
getLogger().error("Unable to get notification information: {}", new Object[] { e });
context.yield();
return;
} catch (MissingEventsException e) {
// set lastTxId to -1 and update state. This may cause events not to be processed. The reason this exception is thrown is described in the
// org.apache.hadoop.hdfs.client.HdfsAdmin#getInotifyEventStrea API. It suggests tuning a couple parameters if this API is used.
lastTxId = -1L;
getLogger().error("Unable to get notification information. Setting transaction id to -1. This may cause some events to get missed. " + "Please see javadoc for org.apache.hadoop.hdfs.client.HdfsAdmin#getInotifyEventStream: {}", new Object[] { e });
}
updateClusterStateForTxId(stateManager);
}
Aggregations