use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class SplitContent method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final ComponentLog logger = getLogger();
final boolean keepSequence = context.getProperty(KEEP_SEQUENCE).asBoolean();
final boolean keepTrailingSequence;
final boolean keepLeadingSequence;
if (keepSequence) {
if (context.getProperty(BYTE_SEQUENCE_LOCATION).getValue().equals(TRAILING_POSITION.getValue())) {
keepTrailingSequence = true;
keepLeadingSequence = false;
} else {
keepTrailingSequence = false;
keepLeadingSequence = true;
}
} else {
keepTrailingSequence = false;
keepLeadingSequence = false;
}
final byte[] byteSequence = this.byteSequence.get();
if (byteSequence == null) {
// should never happen. But just in case...
logger.error("{} Unable to obtain Byte Sequence", new Object[] { this });
session.rollback();
return;
}
final List<Tuple<Long, Long>> splits = new ArrayList<>();
final NaiveSearchRingBuffer buffer = new NaiveSearchRingBuffer(byteSequence);
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream rawIn) throws IOException {
long bytesRead = 0L;
long startOffset = 0L;
try (final InputStream in = new BufferedInputStream(rawIn)) {
while (true) {
final int nextByte = in.read();
if (nextByte == -1) {
return;
}
bytesRead++;
boolean matched = buffer.addAndCompare((byte) (nextByte & 0xFF));
if (matched) {
long splitLength;
if (keepTrailingSequence) {
splitLength = bytesRead - startOffset;
} else {
splitLength = bytesRead - startOffset - byteSequence.length;
}
if (keepLeadingSequence && startOffset > 0) {
splitLength += byteSequence.length;
}
final long splitStart = (keepLeadingSequence && startOffset > 0) ? startOffset - byteSequence.length : startOffset;
splits.add(new Tuple<>(splitStart, splitLength));
startOffset = bytesRead;
buffer.clear();
}
}
}
}
});
long lastOffsetPlusSize = -1L;
if (splits.isEmpty()) {
FlowFile clone = session.clone(flowFile);
session.transfer(flowFile, REL_ORIGINAL);
session.transfer(clone, REL_SPLITS);
logger.info("Found no match for {}; transferring original 'original' and transferring clone {} to 'splits'", new Object[] { flowFile, clone });
return;
}
final ArrayList<FlowFile> splitList = new ArrayList<>();
for (final Tuple<Long, Long> tuple : splits) {
long offset = tuple.getKey();
long size = tuple.getValue();
if (size > 0) {
FlowFile split = session.clone(flowFile, offset, size);
splitList.add(split);
}
lastOffsetPlusSize = offset + size;
}
// lastOffsetPlusSize indicates the ending position of the last split.
// if the data didn't end with the byte sequence, we need one final split to run from the end
// of the last split to the end of the content.
long finalSplitOffset = lastOffsetPlusSize;
if (!keepTrailingSequence && !keepLeadingSequence) {
finalSplitOffset += byteSequence.length;
}
if (finalSplitOffset > -1L && finalSplitOffset < flowFile.getSize()) {
FlowFile finalSplit = session.clone(flowFile, finalSplitOffset, flowFile.getSize() - finalSplitOffset);
splitList.add(finalSplit);
}
final String fragmentId = finishFragmentAttributes(session, flowFile, splitList);
session.transfer(splitList, REL_SPLITS);
flowFile = FragmentAttributes.copyAttributesToOriginal(session, flowFile, fragmentId, splitList.size());
session.transfer(flowFile, REL_ORIGINAL);
if (splitList.size() > 10) {
logger.info("Split {} into {} files", new Object[] { flowFile, splitList.size() });
} else {
logger.info("Split {} into {} files: {}", new Object[] { flowFile, splitList.size(), splitList });
}
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class ValidateCsv method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final CsvPreference csvPref = getPreference(context, flowFile);
final boolean header = context.getProperty(HEADER).asBoolean();
final ComponentLog logger = getLogger();
final String schema = context.getProperty(SCHEMA).evaluateAttributeExpressions(flowFile).getValue();
final CellProcessor[] cellProcs = this.parseSchema(schema);
final boolean isWholeFFValidation = context.getProperty(VALIDATION_STRATEGY).getValue().equals(VALIDATE_WHOLE_FLOWFILE.getValue());
final AtomicReference<Boolean> valid = new AtomicReference<Boolean>(true);
final AtomicReference<Boolean> isFirstLineValid = new AtomicReference<Boolean>(true);
final AtomicReference<Boolean> isFirstLineInvalid = new AtomicReference<Boolean>(true);
final AtomicReference<Integer> okCount = new AtomicReference<Integer>(0);
final AtomicReference<Integer> totalCount = new AtomicReference<Integer>(0);
final AtomicReference<FlowFile> invalidFF = new AtomicReference<FlowFile>(null);
final AtomicReference<FlowFile> validFF = new AtomicReference<FlowFile>(null);
if (!isWholeFFValidation) {
invalidFF.set(session.create(flowFile));
validFF.set(session.create(flowFile));
}
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
NifiCsvListReader listReader = null;
try {
listReader = new NifiCsvListReader(new InputStreamReader(in), csvPref);
// handling of header
if (header) {
List<String> headerList = listReader.read();
if (!isWholeFFValidation) {
invalidFF.set(session.append(invalidFF.get(), new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write(print(headerList, csvPref, isFirstLineInvalid.get()));
}
}));
validFF.set(session.append(validFF.get(), new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write(print(headerList, csvPref, isFirstLineValid.get()));
}
}));
isFirstLineValid.set(false);
isFirstLineInvalid.set(false);
}
}
boolean stop = false;
while (!stop) {
try {
final List<Object> list = listReader.read(cellProcs);
stop = list == null;
if (!isWholeFFValidation && !stop) {
validFF.set(session.append(validFF.get(), new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write(print(list, csvPref, isFirstLineValid.get()));
}
}));
okCount.set(okCount.get() + 1);
if (isFirstLineValid.get()) {
isFirstLineValid.set(false);
}
}
} catch (final SuperCsvException e) {
valid.set(false);
if (isWholeFFValidation) {
logger.debug("Failed to validate {} against schema due to {}; routing to 'invalid'", new Object[] { flowFile }, e);
break;
} else {
// we append the invalid line to the flow file that will be routed to invalid relationship
invalidFF.set(session.append(invalidFF.get(), new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write(print(e.getCsvContext().getRowSource(), csvPref, isFirstLineInvalid.get()));
}
}));
if (isFirstLineInvalid.get()) {
isFirstLineInvalid.set(false);
}
}
} finally {
if (!isWholeFFValidation) {
totalCount.set(totalCount.get() + 1);
}
}
}
} catch (final IOException e) {
valid.set(false);
logger.error("Failed to validate {} against schema due to {}", new Object[] { flowFile }, e);
} finally {
if (listReader != null) {
listReader.close();
}
}
}
});
if (isWholeFFValidation) {
if (valid.get()) {
logger.debug("Successfully validated {} against schema; routing to 'valid'", new Object[] { flowFile });
session.getProvenanceReporter().route(flowFile, REL_VALID);
session.transfer(flowFile, REL_VALID);
} else {
session.getProvenanceReporter().route(flowFile, REL_INVALID);
session.transfer(flowFile, REL_INVALID);
}
} else {
if (valid.get()) {
logger.debug("Successfully validated {} against schema; routing to 'valid'", new Object[] { validFF.get() });
session.getProvenanceReporter().route(validFF.get(), REL_VALID, "All " + totalCount.get() + " line(s) are valid");
session.putAttribute(validFF.get(), "count.valid.lines", Integer.toString(totalCount.get()));
session.putAttribute(validFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
session.transfer(validFF.get(), REL_VALID);
session.remove(invalidFF.get());
session.remove(flowFile);
} else if (okCount.get() != 0) {
// because of the finally within the 'while' loop
totalCount.set(totalCount.get() - 1);
logger.debug("Successfully validated {}/{} line(s) in {} against schema; routing valid lines to 'valid' and invalid lines to 'invalid'", new Object[] { okCount.get(), totalCount.get(), flowFile });
session.getProvenanceReporter().route(validFF.get(), REL_VALID, okCount.get() + " valid line(s)");
session.putAttribute(validFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
session.putAttribute(validFF.get(), "count.valid.lines", Integer.toString(okCount.get()));
session.transfer(validFF.get(), REL_VALID);
session.getProvenanceReporter().route(invalidFF.get(), REL_INVALID, (totalCount.get() - okCount.get()) + " invalid line(s)");
session.putAttribute(invalidFF.get(), "count.invalid.lines", Integer.toString((totalCount.get() - okCount.get())));
session.putAttribute(invalidFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
session.transfer(invalidFF.get(), REL_INVALID);
session.remove(flowFile);
} else {
logger.debug("All lines in {} are invalid; routing to 'invalid'", new Object[] { invalidFF.get() });
session.getProvenanceReporter().route(invalidFF.get(), REL_INVALID, "All " + totalCount.get() + " line(s) are invalid");
session.putAttribute(invalidFF.get(), "count.invalid.lines", Integer.toString(totalCount.get()));
session.putAttribute(invalidFF.get(), "count.total.lines", Integer.toString(totalCount.get()));
session.transfer(invalidFF.get(), REL_INVALID);
session.remove(validFF.get());
session.remove(flowFile);
}
}
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class TestJmsConsumer method testMap2FlowFileTextMessage.
@Test
public void testMap2FlowFileTextMessage() throws Exception {
TestRunner runner = TestRunners.newTestRunner(GetJMSQueue.class);
TextMessage textMessage = new ActiveMQTextMessage();
String payload = "Hello world!";
textMessage.setText(payload);
ProcessContext context = runner.getProcessContext();
ProcessSession session = runner.getProcessSessionFactory().createSession();
ProcessorInitializationContext pic = new MockProcessorInitializationContext(runner.getProcessor(), (MockProcessContext) runner.getProcessContext());
JmsProcessingSummary summary = JmsConsumer.map2FlowFile(context, session, textMessage, true, pic.getLogger());
assertEquals("TextMessage content length should equal to FlowFile content size", payload.length(), summary.getLastFlowFile().getSize());
final byte[] buffer = new byte[payload.length()];
runner.clearTransferState();
session.read(summary.getLastFlowFile(), new InputStreamCallback() {
@Override
public void process(InputStream in) throws IOException {
StreamUtils.fillBuffer(in, buffer, false);
}
});
String contentString = new String(buffer, "UTF-8");
assertEquals("", payload, contentString);
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class PublishAMQP method extractMessage.
/**
* Extracts contents of the {@link FlowFile} as byte array.
*/
private byte[] extractMessage(FlowFile flowFile, ProcessSession session) {
final byte[] messageContent = new byte[(int) flowFile.getSize()];
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
StreamUtils.fillBuffer(in, messageContent, true);
}
});
return messageContent;
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class ExtractAvroMetadata method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final Map<String, String> avroMetadata = new HashMap<>();
final Set<String> requestedMetadataKeys = new HashSet<>();
final boolean countRecords = context.getProperty(COUNT_ITEMS).asBoolean();
final String fingerprintAlgorithm = context.getProperty(FINGERPRINT_ALGORITHM).getValue();
final String metadataKeysValue = context.getProperty(METADATA_KEYS).getValue();
if (!StringUtils.isEmpty(metadataKeysValue)) {
final String[] keys = metadataKeysValue.split("\\s*,\\s*");
for (final String key : keys) {
requestedMetadataKeys.add(key.trim());
}
}
try {
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(InputStream rawIn) throws IOException {
try (final InputStream in = new BufferedInputStream(rawIn);
final DataFileStream<GenericRecord> reader = new DataFileStream<>(in, new GenericDatumReader<GenericRecord>())) {
final Schema schema = reader.getSchema();
if (schema == null) {
throw new ProcessException("Avro schema was null");
}
for (String key : reader.getMetaKeys()) {
if (requestedMetadataKeys.contains(key)) {
avroMetadata.put(key, reader.getMetaString(key));
}
}
try {
final byte[] rawFingerprint = SchemaNormalization.parsingFingerprint(fingerprintAlgorithm, schema);
avroMetadata.put(SCHEMA_FINGERPRINT_ATTR, Hex.encodeHexString(rawFingerprint));
avroMetadata.put(SCHEMA_TYPE_ATTR, schema.getType().getName());
avroMetadata.put(SCHEMA_NAME_ATTR, schema.getName());
} catch (NoSuchAlgorithmException e) {
// shouldn't happen since allowable values are valid algorithms
throw new ProcessException(e);
}
if (countRecords) {
long recordCount = reader.getBlockCount();
try {
while (reader.nextBlock() != null) {
recordCount += reader.getBlockCount();
}
} catch (NoSuchElementException e) {
// happens at end of file
}
avroMetadata.put(ITEM_COUNT_ATTR, String.valueOf(recordCount));
}
}
}
});
} catch (final ProcessException pe) {
getLogger().error("Failed to extract Avro metadata for {} due to {}; transferring to failure", new Object[] { flowFile, pe });
session.transfer(flowFile, REL_FAILURE);
return;
}
flowFile = session.putAllAttributes(flowFile, avroMetadata);
session.transfer(flowFile, REL_SUCCESS);
}
Aggregations