use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class PutSQL method getSQL.
/**
* Determines the SQL statement that should be executed for the given FlowFile
*
* @param session the session that can be used to access the given FlowFile
* @param flowFile the FlowFile whose SQL statement should be executed
*
* @return the SQL that is associated with the given FlowFile
*/
private String getSQL(final ProcessSession session, final FlowFile flowFile) {
// Read the SQL from the FlowFile's content
final byte[] buffer = new byte[(int) flowFile.getSize()];
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
StreamUtils.fillBuffer(in, buffer);
}
});
// Create the PreparedStatement to use for this FlowFile.
final String sql = new String(buffer, StandardCharsets.UTF_8);
return sql;
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class SplitRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile original = session.get();
if (original == null) {
return;
}
final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
final int maxRecords = context.getProperty(RECORDS_PER_SPLIT).evaluateAttributeExpressions(original).asInteger();
final List<FlowFile> splits = new ArrayList<>();
final Map<String, String> originalAttributes = original.getAttributes();
try {
session.read(original, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
try (final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger())) {
final RecordSchema schema = writerFactory.getSchema(originalAttributes, reader.getSchema());
final RecordSet recordSet = reader.createRecordSet();
final PushBackRecordSet pushbackSet = new PushBackRecordSet(recordSet);
while (pushbackSet.isAnotherRecord()) {
FlowFile split = session.create(original);
try {
final Map<String, String> attributes = new HashMap<>();
final WriteResult writeResult;
try (final OutputStream out = session.write(split);
final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out)) {
if (maxRecords == 1) {
final Record record = pushbackSet.next();
writeResult = writer.write(record);
} else {
final RecordSet limitedSet = pushbackSet.limit(maxRecords);
writeResult = writer.write(limitedSet);
}
attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
attributes.putAll(writeResult.getAttributes());
session.adjustCounter("Records Split", writeResult.getRecordCount(), false);
}
split = session.putAllAttributes(split, attributes);
} finally {
splits.add(split);
}
}
} catch (final SchemaNotFoundException | MalformedRecordException e) {
throw new ProcessException("Failed to parse incoming data", e);
}
}
});
} catch (final ProcessException pe) {
getLogger().error("Failed to split {}", new Object[] { original, pe });
session.remove(splits);
session.transfer(original, REL_FAILURE);
return;
}
session.transfer(original, REL_ORIGINAL);
session.transfer(splits, REL_SPLITS);
getLogger().info("Successfully split {} into {} FlowFiles, each containing up to {} records", new Object[] { original, splits.size(), maxRecords });
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class SplitText method onTrigger.
/**
* Will split the incoming stream releasing all splits as FlowFile at once.
*/
@Override
public void onTrigger(ProcessContext context, ProcessSession processSession) throws ProcessException {
FlowFile sourceFlowFile = processSession.get();
if (sourceFlowFile == null) {
return;
}
AtomicBoolean error = new AtomicBoolean();
List<SplitInfo> computedSplitsInfo = new ArrayList<>();
AtomicReference<SplitInfo> headerSplitInfoRef = new AtomicReference<>();
processSession.read(sourceFlowFile, new InputStreamCallback() {
@Override
public void process(InputStream in) throws IOException {
TextLineDemarcator demarcator = new TextLineDemarcator(in);
SplitInfo splitInfo = null;
long startOffset = 0;
// Compute fragment representing the header (if available)
long start = System.nanoTime();
try {
if (SplitText.this.headerLineCount > 0) {
splitInfo = SplitText.this.computeHeader(demarcator, startOffset, SplitText.this.headerLineCount, null, null);
if ((splitInfo != null) && (splitInfo.lineCount < SplitText.this.headerLineCount)) {
error.set(true);
getLogger().error("Unable to split " + sourceFlowFile + " due to insufficient amount of header lines. Required " + SplitText.this.headerLineCount + " but was " + splitInfo.lineCount + ". Routing to failure.");
}
} else if (SplitText.this.headerMarker != null) {
splitInfo = SplitText.this.computeHeader(demarcator, startOffset, Long.MAX_VALUE, SplitText.this.headerMarker.getBytes(StandardCharsets.UTF_8), null);
}
headerSplitInfoRef.set(splitInfo);
} catch (IllegalStateException e) {
error.set(true);
getLogger().error(e.getMessage() + " Routing to failure.", e);
}
// Compute and collect fragments representing the individual splits
if (!error.get()) {
if (headerSplitInfoRef.get() != null) {
startOffset = headerSplitInfoRef.get().length;
}
long preAccumulatedLength = startOffset;
while ((splitInfo = SplitText.this.nextSplit(demarcator, startOffset, SplitText.this.lineCount, splitInfo, preAccumulatedLength)) != null) {
computedSplitsInfo.add(splitInfo);
startOffset += splitInfo.length;
}
long stop = System.nanoTime();
if (getLogger().isDebugEnabled()) {
getLogger().debug("Computed splits in " + (stop - start) + " milliseconds.");
}
}
}
});
if (error.get()) {
processSession.transfer(sourceFlowFile, REL_FAILURE);
} else {
final String fragmentId = UUID.randomUUID().toString();
List<FlowFile> splitFlowFiles = this.generateSplitFlowFiles(fragmentId, sourceFlowFile, headerSplitInfoRef.get(), computedSplitsInfo, processSession);
final FlowFile originalFlowFile = FragmentAttributes.copyAttributesToOriginal(processSession, sourceFlowFile, fragmentId, splitFlowFiles.size());
processSession.transfer(originalFlowFile, REL_ORIGINAL);
if (!splitFlowFiles.isEmpty()) {
processSession.transfer(splitFlowFiles, REL_SPLITS);
}
}
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class IdentifyMimeType method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final ComponentLog logger = getLogger();
final AtomicReference<String> mimeTypeRef = new AtomicReference<>(null);
final String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key());
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream stream) throws IOException {
try (final InputStream in = new BufferedInputStream(stream)) {
TikaInputStream tikaStream = TikaInputStream.get(in);
Metadata metadata = new Metadata();
if (filename != null && context.getProperty(USE_FILENAME_IN_DETECTION).asBoolean()) {
metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, filename);
}
// Get mime type
MediaType mediatype = detector.detect(tikaStream, metadata);
mimeTypeRef.set(mediatype.toString());
}
}
});
String mimeType = mimeTypeRef.get();
String extension = "";
try {
MimeType mimetype;
mimetype = config.getMimeRepository().forName(mimeType);
extension = mimetype.getExtension();
} catch (MimeTypeException ex) {
logger.warn("MIME type extension lookup failed: {}", new Object[] { ex });
}
// Workaround for bug in Tika - https://issues.apache.org/jira/browse/TIKA-1563
if (mimeType != null && mimeType.equals("application/gzip") && extension.equals(".tgz")) {
extension = ".gz";
}
if (mimeType == null) {
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/octet-stream");
flowFile = session.putAttribute(flowFile, "mime.extension", "");
logger.info("Unable to identify MIME Type for {}; setting to application/octet-stream", new Object[] { flowFile });
} else {
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), mimeType);
flowFile = session.putAttribute(flowFile, "mime.extension", extension);
logger.info("Identified {} as having MIME Type {}", new Object[] { flowFile, mimeType });
}
session.getProvenanceReporter().modifyAttributes(flowFile);
session.transfer(flowFile, REL_SUCCESS);
}
use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.
the class ParseCEF method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final CEFParser parser = new CEFParser(validator);
final byte[] buffer = new byte[(int) flowFile.getSize()];
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
StreamUtils.fillBuffer(in, buffer);
}
});
CommonEvent event;
try {
// parcefoneLocale defaults to en_US, so this should not fail. But we force failure in case the custom
// validator failed to identify an invalid Locale
final Locale parcefoneLocale = Locale.forLanguageTag(context.getProperty(DATETIME_REPRESENTATION).getValue());
event = parser.parse(buffer, true, parcefoneLocale);
} catch (Exception e) {
// This should never trigger but adding in here as a fencing mechanism to
// address possible ParCEFone bugs.
getLogger().error("Parser returned unexpected Exception {} while processing {}; routing to failure", new Object[] { e, flowFile });
session.transfer(flowFile, REL_FAILURE);
return;
}
// event, so we test
if (event == null) {
getLogger().error("Failed to parse {} as a CEF message: it does not conform to the CEF standard; routing to failure", new Object[] { flowFile });
session.transfer(flowFile, REL_FAILURE);
return;
}
try {
final String destination = context.getProperty(FIELDS_DESTINATION).getValue();
switch(destination) {
case DESTINATION_ATTRIBUTES:
final Map<String, String> attributes = new HashMap<>();
// Process KVs of the Header field
for (Map.Entry<String, Object> entry : event.getHeader().entrySet()) {
attributes.put("cef.header." + entry.getKey(), prettyResult(entry.getValue(), tzId));
}
// Process KVs composing the Extension field
for (Map.Entry<String, Object> entry : event.getExtension(true).entrySet()) {
attributes.put("cef.extension." + entry.getKey(), prettyResult(entry.getValue(), tzId));
flowFile = session.putAllAttributes(flowFile, attributes);
}
break;
case DESTINATION_CONTENT:
ObjectNode results = mapper.createObjectNode();
// Add two JSON objects containing one CEF field each
results.set("header", mapper.valueToTree(event.getHeader()));
results.set("extension", mapper.valueToTree(event.getExtension(true)));
// to the resulting JSON
if (context.getProperty(APPEND_RAW_MESSAGE_TO_JSON).asBoolean()) {
results.set("_raw", mapper.valueToTree(new String(buffer)));
}
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
try (OutputStream outputStream = new BufferedOutputStream(out)) {
outputStream.write(mapper.writeValueAsBytes(results));
}
}
});
// Adjust the FlowFile mime.type attribute
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/json");
// Update the provenance for good measure
session.getProvenanceReporter().modifyContent(flowFile, "Replaced content with parsed CEF fields and values");
break;
}
// whatever the parsing stratgy, ready to transfer to success and commit
session.transfer(flowFile, REL_SUCCESS);
session.commit();
} catch (CEFHandlingException e) {
// The flowfile has failed parsing & validation, routing to failure and committing
getLogger().error("Failed to parse {} as a CEF message due to {}; routing to failure", new Object[] { flowFile, e });
// Create a provenance event recording the routing to failure
session.getProvenanceReporter().route(flowFile, REL_FAILURE);
session.transfer(flowFile, REL_FAILURE);
session.commit();
return;
} finally {
session.rollback();
}
}
Aggregations