use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.
the class TestEvaluateJsonPath method testNullInput.
@Test
public void testNullInput() throws Exception {
final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateJsonPath());
testRunner.setProperty(EvaluateJsonPath.RETURN_TYPE, EvaluateJsonPath.RETURN_TYPE_JSON);
testRunner.setProperty(EvaluateJsonPath.DESTINATION, EvaluateJsonPath.DESTINATION_ATTRIBUTE);
testRunner.setProperty("stringField", "$.stringField");
testRunner.setProperty("missingField", "$.missingField");
testRunner.setProperty("nullField", "$.nullField");
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
FlowFile ff = session.create();
ff = session.write(ff, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
try (OutputStream outputStream = new BufferedOutputStream(out)) {
outputStream.write("{\"stringField\": \"String Value\", \"nullField\": null}".getBytes(StandardCharsets.UTF_8));
}
}
});
testRunner.enqueue(ff);
testRunner.run();
testRunner.assertTransferCount(EvaluateJsonPath.REL_MATCH, 1);
FlowFile output = testRunner.getFlowFilesForRelationship(EvaluateJsonPath.REL_MATCH).get(0);
String validFieldValue = output.getAttribute("stringField");
assertEquals("String Value", validFieldValue);
String missingValue = output.getAttribute("missingField");
assertEquals("Missing Value", "", missingValue);
String nullValue = output.getAttribute("nullField");
assertEquals("Null Value", "", nullValue);
}
use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.
the class TestSplitJson method testSplit_pathToNullValue.
@Test
public void testSplit_pathToNullValue() throws Exception {
final TestRunner testRunner = TestRunners.newTestRunner(new SplitJson());
testRunner.setProperty(SplitJson.ARRAY_JSON_PATH_EXPRESSION, "$.nullField");
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
FlowFile ff = session.create();
ff = session.write(ff, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
try (OutputStream outputStream = new BufferedOutputStream(out)) {
outputStream.write("{\"stringField\": \"String Value\", \"nullField\": null}".getBytes(StandardCharsets.UTF_8));
}
}
});
testRunner.enqueue(ff);
testRunner.run();
testRunner.assertTransferCount(SplitJson.REL_FAILURE, 1);
}
use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.
the class GetHBase method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final String tableName = context.getProperty(TABLE_NAME).getValue();
final String initialTimeRange = context.getProperty(INITIAL_TIMERANGE).getValue();
final String filterExpression = context.getProperty(FILTER_EXPRESSION).getValue();
final HBaseClientService hBaseClientService = context.getProperty(HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class);
// if the table was changed then remove any previous state
if (previousTable != null && !tableName.equals(previousTable)) {
try {
context.getStateManager().clear(Scope.CLUSTER);
} catch (final IOException ioe) {
getLogger().warn("Failed to clear Cluster State", ioe);
}
previousTable = tableName;
}
try {
final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());
final RowSerializer serializer = new JsonRowSerializer(charset);
this.lastResult = getState(context.getStateManager());
final long defaultMinTime = (initialTimeRange.equals(NONE.getValue()) ? 0L : System.currentTimeMillis());
final long minTime = (lastResult == null ? defaultMinTime : lastResult.getTimestamp());
final Map<String, Set<String>> cellsMatchingTimestamp = new HashMap<>();
final AtomicReference<Long> rowsPulledHolder = new AtomicReference<>(0L);
final AtomicReference<Long> latestTimestampHolder = new AtomicReference<>(minTime);
hBaseClientService.scan(tableName, columns, filterExpression, minTime, new ResultHandler() {
@Override
public void handle(final byte[] rowKey, final ResultCell[] resultCells) {
final String rowKeyString = new String(rowKey, StandardCharsets.UTF_8);
// check if latest cell timestamp is equal to our cutoff.
// if any of the cells have a timestamp later than our cutoff, then we
// want the row. But if the cell with the latest timestamp is equal to
// our cutoff, then we want to check if that's one of the cells that
// we have already seen.
long latestCellTimestamp = 0L;
for (final ResultCell cell : resultCells) {
if (cell.getTimestamp() > latestCellTimestamp) {
latestCellTimestamp = cell.getTimestamp();
}
}
// we've already seen this.
if (latestCellTimestamp < minTime) {
getLogger().debug("latest cell timestamp for row {} is {}, which is earlier than the minimum time of {}", new Object[] { rowKeyString, latestCellTimestamp, minTime });
return;
}
if (latestCellTimestamp == minTime) {
// latest cell timestamp is equal to our minimum time. Check if all cells that have
// that timestamp are in our list of previously seen cells.
boolean allSeen = true;
for (final ResultCell cell : resultCells) {
if (cell.getTimestamp() == latestCellTimestamp) {
if (lastResult == null || !lastResult.contains(cell)) {
allSeen = false;
break;
}
}
}
if (allSeen) {
// we have already seen all of the cells for this row. We do not want to
// include this cell in our output.
getLogger().debug("all cells for row {} have already been seen", new Object[] { rowKeyString });
return;
}
}
// we can ignore these cells.
if (latestCellTimestamp >= latestTimestampHolder.get()) {
// new timestamp, so clear all of the 'matching cells'
if (latestCellTimestamp > latestTimestampHolder.get()) {
latestTimestampHolder.set(latestCellTimestamp);
cellsMatchingTimestamp.clear();
}
for (final ResultCell cell : resultCells) {
final long ts = cell.getTimestamp();
if (ts == latestCellTimestamp) {
final byte[] rowValue = Arrays.copyOfRange(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength() + cell.getRowOffset());
final byte[] cellValue = Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength() + cell.getValueOffset());
final String rowHash = new String(rowValue, StandardCharsets.UTF_8);
Set<String> cellHashes = cellsMatchingTimestamp.get(rowHash);
if (cellHashes == null) {
cellHashes = new HashSet<>();
cellsMatchingTimestamp.put(rowHash, cellHashes);
}
cellHashes.add(new String(cellValue, StandardCharsets.UTF_8));
}
}
}
// write the row to a new FlowFile.
FlowFile flowFile = session.create();
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
serializer.serialize(rowKey, resultCells, out);
}
});
final Map<String, String> attributes = new HashMap<>();
attributes.put("hbase.table", tableName);
attributes.put("mime.type", "application/json");
flowFile = session.putAllAttributes(flowFile, attributes);
session.getProvenanceReporter().receive(flowFile, hBaseClientService.toTransitUri(tableName, rowKeyString));
session.transfer(flowFile, REL_SUCCESS);
getLogger().debug("Received {} from HBase with row key {}", new Object[] { flowFile, rowKeyString });
// we could potentially have a huge number of rows. If we get to 500, go ahead and commit the
// session so that we can avoid buffering tons of FlowFiles without ever sending any out.
long rowsPulled = rowsPulledHolder.get();
rowsPulledHolder.set(++rowsPulled);
if (++rowsPulled % getBatchSize() == 0) {
session.commit();
}
}
});
final ScanResult scanResults = new ScanResult(latestTimestampHolder.get(), cellsMatchingTimestamp);
// Commit session before we replace the lastResult; if session commit fails, we want
// to pull these records again.
session.commit();
if (lastResult == null || scanResults.getTimestamp() > lastResult.getTimestamp()) {
lastResult = scanResults;
} else if (scanResults.getTimestamp() == lastResult.getTimestamp()) {
final Map<String, Set<String>> combinedResults = new HashMap<>(scanResults.getMatchingCells());
// do a deep copy because the Set may be modified below.
for (final Map.Entry<String, Set<String>> entry : scanResults.getMatchingCells().entrySet()) {
combinedResults.put(entry.getKey(), new HashSet<>(entry.getValue()));
}
// combined the results from 'lastResult'
for (final Map.Entry<String, Set<String>> entry : lastResult.getMatchingCells().entrySet()) {
final Set<String> existing = combinedResults.get(entry.getKey());
if (existing == null) {
combinedResults.put(entry.getKey(), new HashSet<>(entry.getValue()));
} else {
existing.addAll(entry.getValue());
}
}
final ScanResult scanResult = new ScanResult(scanResults.getTimestamp(), combinedResults);
lastResult = scanResult;
}
// save state using the framework's state manager
storeState(lastResult, context.getStateManager());
} catch (final IOException e) {
getLogger().error("Failed to receive data from HBase due to {}", e);
session.rollback();
} finally {
// if we failed, we want to yield so that we don't hammer hbase. If we succeed, then we have
// pulled all of the records, so we want to wait a bit before hitting hbase again anyway.
context.yield();
}
}
use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.
the class GetKafka method consumeFromKafka.
private void consumeFromKafka(final ProcessContext context, final ProcessSession session, ConsumerIterator<byte[], byte[]> iterator) throws ProcessException {
final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
final String demarcator = context.getProperty(MESSAGE_DEMARCATOR).getValue().replace("\\n", "\n").replace("\\r", "\r").replace("\\t", "\t");
final byte[] demarcatorBytes = demarcator.getBytes(StandardCharsets.UTF_8);
final String topic = context.getProperty(TOPIC).evaluateAttributeExpressions().getValue();
FlowFile flowFile = session.create();
final Map<String, String> attributes = new HashMap<>();
attributes.put("kafka.topic", topic);
final long start = System.nanoTime();
int msgCount = 0;
try {
for (; msgCount < batchSize && iterator.hasNext(); msgCount++) {
final MessageAndMetadata<byte[], byte[]> mam = iterator.next();
if (batchSize == 1) {
final byte[] key = mam.key();
// for a batch size of 1.
if (key != null) {
attributes.put("kafka.key", new String(key, StandardCharsets.UTF_8));
}
attributes.put("kafka.offset", String.valueOf(mam.offset()));
attributes.put("kafka.partition", String.valueOf(mam.partition()));
}
// add the message to the FlowFile's contents
final boolean firstMessage = msgCount == 0;
flowFile = session.append(flowFile, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
if (!firstMessage) {
out.write(demarcatorBytes);
}
out.write(mam.message());
}
});
}
this.releaseFlowFile(flowFile, session, attributes, start, topic, msgCount);
} catch (ConsumerTimeoutException e) {
/*
* By default Kafka blocks indefinitely if topic is empty via
* stream.hasNext(). If 'consumer.timeout.ms' property is set (see
* http://kafka.apache.org/documentation.html#configuration) the
* hasNext() will fail with this exception. To this processor it
* simply means there are no messages and current task should exit
* in non-failure releasing the flow file if it was able to
* accumulate any events.
*/
this.releaseFlowFile(flowFile, session, attributes, start, topic, msgCount);
} catch (final Exception e) {
this.shutdownConsumer();
getLogger().error("Failed to receive FlowFile from Kafka due to {}", new Object[] { e });
if (flowFile != null) {
session.remove(flowFile);
}
} finally {
// Add the iterator back to the queue
if (iterator != null) {
streamIterators.offer(iterator);
}
}
}
use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.
the class AbstractFlumeProcessor method transferEvent.
protected static void transferEvent(final Event event, ProcessSession session, Relationship relationship) {
FlowFile flowFile = session.create();
flowFile = session.putAllAttributes(flowFile, event.getHeaders());
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
out.write(event.getBody());
}
});
session.getProvenanceReporter().create(flowFile);
session.transfer(flowFile, relationship);
}
Aggregations