use of org.apache.nifi.processor.io.StreamCallback in project nifi by apache.
the class CompressContent method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final ComponentLog logger = getLogger();
final long sizeBeforeCompression = flowFile.getSize();
final String compressionMode = context.getProperty(MODE).getValue();
String compressionFormatValue = context.getProperty(COMPRESSION_FORMAT).getValue();
if (compressionFormatValue.equals(COMPRESSION_FORMAT_ATTRIBUTE)) {
final String mimeType = flowFile.getAttribute(CoreAttributes.MIME_TYPE.key());
if (mimeType == null) {
logger.error("No {} attribute exists for {}; routing to failure", new Object[] { CoreAttributes.MIME_TYPE.key(), flowFile });
session.transfer(flowFile, REL_FAILURE);
return;
}
compressionFormatValue = compressionFormatMimeTypeMap.get(mimeType);
if (compressionFormatValue == null) {
logger.info("Mime Type of {} is '{}', which does not indicate a supported Compression Format; routing to success without decompressing", new Object[] { flowFile, mimeType });
session.transfer(flowFile, REL_SUCCESS);
return;
}
}
final String compressionFormat = compressionFormatValue;
final AtomicReference<String> mimeTypeRef = new AtomicReference<>(null);
final StopWatch stopWatch = new StopWatch(true);
final String fileExtension;
switch(compressionFormat.toLowerCase()) {
case COMPRESSION_FORMAT_GZIP:
fileExtension = ".gz";
break;
case COMPRESSION_FORMAT_LZMA:
fileExtension = ".lzma";
break;
case COMPRESSION_FORMAT_XZ_LZMA2:
fileExtension = ".xz";
break;
case COMPRESSION_FORMAT_BZIP2:
fileExtension = ".bz2";
break;
case COMPRESSION_FORMAT_SNAPPY:
fileExtension = ".snappy";
break;
case COMPRESSION_FORMAT_SNAPPY_FRAMED:
fileExtension = ".sz";
break;
default:
fileExtension = "";
break;
}
try {
flowFile = session.write(flowFile, new StreamCallback() {
@Override
public void process(final InputStream rawIn, final OutputStream rawOut) throws IOException {
final OutputStream compressionOut;
final InputStream compressionIn;
final OutputStream bufferedOut = new BufferedOutputStream(rawOut, 65536);
final InputStream bufferedIn = new BufferedInputStream(rawIn, 65536);
try {
if (MODE_COMPRESS.equalsIgnoreCase(compressionMode)) {
compressionIn = bufferedIn;
switch(compressionFormat.toLowerCase()) {
case COMPRESSION_FORMAT_GZIP:
final int compressionLevel = context.getProperty(COMPRESSION_LEVEL).asInteger();
compressionOut = new GZIPOutputStream(bufferedOut, compressionLevel);
mimeTypeRef.set("application/gzip");
break;
case COMPRESSION_FORMAT_LZMA:
compressionOut = new LzmaOutputStream.Builder(bufferedOut).build();
mimeTypeRef.set("application/x-lzma");
break;
case COMPRESSION_FORMAT_XZ_LZMA2:
compressionOut = new XZOutputStream(bufferedOut, new LZMA2Options());
mimeTypeRef.set("application/x-xz");
break;
case COMPRESSION_FORMAT_SNAPPY:
compressionOut = new SnappyOutputStream(bufferedOut);
mimeTypeRef.set("application/x-snappy");
break;
case COMPRESSION_FORMAT_SNAPPY_FRAMED:
compressionOut = new SnappyFramedOutputStream(bufferedOut);
mimeTypeRef.set("application/x-snappy-framed");
break;
case COMPRESSION_FORMAT_BZIP2:
default:
mimeTypeRef.set("application/x-bzip2");
compressionOut = new CompressorStreamFactory().createCompressorOutputStream(compressionFormat.toLowerCase(), bufferedOut);
break;
}
} else {
compressionOut = bufferedOut;
switch(compressionFormat.toLowerCase()) {
case COMPRESSION_FORMAT_LZMA:
compressionIn = new LzmaInputStream(bufferedIn, new Decoder());
break;
case COMPRESSION_FORMAT_XZ_LZMA2:
compressionIn = new XZInputStream(bufferedIn);
break;
case COMPRESSION_FORMAT_BZIP2:
// need this two-arg constructor to support concatenated streams
compressionIn = new BZip2CompressorInputStream(bufferedIn, true);
break;
case COMPRESSION_FORMAT_GZIP:
compressionIn = new GzipCompressorInputStream(bufferedIn, true);
break;
case COMPRESSION_FORMAT_SNAPPY:
compressionIn = new SnappyInputStream(bufferedIn);
break;
case COMPRESSION_FORMAT_SNAPPY_FRAMED:
compressionIn = new SnappyFramedInputStream(bufferedIn);
break;
default:
compressionIn = new CompressorStreamFactory().createCompressorInputStream(compressionFormat.toLowerCase(), bufferedIn);
}
}
} catch (final Exception e) {
closeQuietly(bufferedOut);
throw new IOException(e);
}
try (final InputStream in = compressionIn;
final OutputStream out = compressionOut) {
final byte[] buffer = new byte[8192];
int len;
while ((len = in.read(buffer)) > 0) {
out.write(buffer, 0, len);
}
out.flush();
}
}
});
stopWatch.stop();
final long sizeAfterCompression = flowFile.getSize();
if (MODE_DECOMPRESS.equalsIgnoreCase(compressionMode)) {
flowFile = session.removeAttribute(flowFile, CoreAttributes.MIME_TYPE.key());
if (context.getProperty(UPDATE_FILENAME).asBoolean()) {
final String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key());
if (filename.toLowerCase().endsWith(fileExtension)) {
flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), filename.substring(0, filename.length() - fileExtension.length()));
}
}
} else {
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), mimeTypeRef.get());
if (context.getProperty(UPDATE_FILENAME).asBoolean()) {
final String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key());
flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), filename + fileExtension);
}
}
logger.info("Successfully {}ed {} using {} compression format; size changed from {} to {} bytes", new Object[] { compressionMode.toLowerCase(), flowFile, compressionFormat, sizeBeforeCompression, sizeAfterCompression });
session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getDuration(TimeUnit.MILLISECONDS));
session.transfer(flowFile, REL_SUCCESS);
} catch (final ProcessException e) {
logger.error("Unable to {} {} using {} compression format due to {}; routing to failure", new Object[] { compressionMode.toLowerCase(), flowFile, compressionFormat, e });
session.transfer(flowFile, REL_FAILURE);
}
}
use of org.apache.nifi.processor.io.StreamCallback in project nifi by apache.
the class ConvertCharacterSet method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final ComponentLog logger = getLogger();
final Charset inputCharset = Charset.forName(context.getProperty(INPUT_CHARSET).evaluateAttributeExpressions(flowFile).getValue());
final Charset outputCharset = Charset.forName(context.getProperty(OUTPUT_CHARSET).evaluateAttributeExpressions(flowFile).getValue());
final CharBuffer charBuffer = CharBuffer.allocate(MAX_BUFFER_SIZE);
final CharsetDecoder decoder = inputCharset.newDecoder();
decoder.onMalformedInput(CodingErrorAction.REPLACE);
decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
decoder.replaceWith("?");
final CharsetEncoder encoder = outputCharset.newEncoder();
encoder.onMalformedInput(CodingErrorAction.REPLACE);
encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
encoder.replaceWith("?".getBytes(outputCharset));
try {
final StopWatch stopWatch = new StopWatch(true);
flowFile = session.write(flowFile, new StreamCallback() {
@Override
public void process(final InputStream rawIn, final OutputStream rawOut) throws IOException {
try (final BufferedReader reader = new BufferedReader(new InputStreamReader(rawIn, decoder), MAX_BUFFER_SIZE);
final BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(rawOut, encoder), MAX_BUFFER_SIZE)) {
int charsRead;
while ((charsRead = reader.read(charBuffer)) != -1) {
charBuffer.flip();
writer.write(charBuffer.array(), 0, charsRead);
}
writer.flush();
}
}
});
session.getProvenanceReporter().modifyContent(flowFile, stopWatch.getElapsed(TimeUnit.MILLISECONDS));
logger.info("successfully converted characters from {} to {} for {}", new Object[] { inputCharset, outputCharset, flowFile });
session.transfer(flowFile, REL_SUCCESS);
} catch (final Exception e) {
throw new ProcessException(e);
}
}
use of org.apache.nifi.processor.io.StreamCallback in project nifi by apache.
the class GetHTMLElement method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final Document doc;
final Elements eles;
try {
doc = parseHTMLDocumentFromFlowfile(flowFile, context, session);
eles = doc.select(context.getProperty(CSS_SELECTOR).evaluateAttributeExpressions(flowFile).getValue());
} catch (final Exception ex) {
getLogger().error("Failed to extract HTML from {} due to {}; routing to {}", new Object[] { flowFile, ex, REL_INVALID_HTML }, ex);
session.transfer(flowFile, REL_INVALID_HTML);
return;
}
final String prependValue = context.getProperty(PREPEND_ELEMENT_VALUE).evaluateAttributeExpressions(flowFile).getValue();
final String appendValue = context.getProperty(APPEND_ELEMENT_VALUE).evaluateAttributeExpressions(flowFile).getValue();
final String outputType = context.getProperty(OUTPUT_TYPE).getValue();
final String attributeKey = context.getProperty(ATTRIBUTE_KEY).evaluateAttributeExpressions(flowFile).getValue();
if (eles == null || eles.isEmpty()) {
// No element found
session.transfer(flowFile, REL_NOT_FOUND);
} else {
// Create a new FlowFile for each matching element.
for (final Element ele : eles) {
final String extractedElementValue = extractElementValue(prependValue, outputType, appendValue, ele, attributeKey);
final FlowFile ff = session.create(flowFile);
FlowFile updatedFF = ff;
switch(context.getProperty(DESTINATION).getValue()) {
case DESTINATION_ATTRIBUTE:
updatedFF = session.putAttribute(ff, HTML_ELEMENT_ATTRIBUTE_NAME, extractedElementValue);
break;
case DESTINATION_CONTENT:
updatedFF = session.write(ff, new StreamCallback() {
@Override
public void process(final InputStream inputStream, final OutputStream outputStream) throws IOException {
outputStream.write(extractedElementValue.getBytes(StandardCharsets.UTF_8));
}
});
break;
}
session.transfer(updatedFF, REL_SUCCESS);
}
// Transfer the original HTML
session.transfer(flowFile, REL_ORIGINAL);
}
}
use of org.apache.nifi.processor.io.StreamCallback in project nifi by apache.
the class ModifyHTMLElement method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final Document doc;
final Elements eles;
try {
doc = parseHTMLDocumentFromFlowfile(flowFile, context, session);
eles = doc.select(context.getProperty(CSS_SELECTOR).evaluateAttributeExpressions(flowFile).getValue());
} catch (Exception ex) {
getLogger().error("Failed to extract HTML from {} due to {}; routing to {}", new Object[] { flowFile, ex.toString(), REL_INVALID_HTML.getName() }, ex);
session.transfer(flowFile, REL_INVALID_HTML);
return;
}
final String modifiedValue = context.getProperty(MODIFIED_VALUE).evaluateAttributeExpressions(flowFile).getValue();
if (eles == null || eles.size() == 0) {
// No element found
session.transfer(flowFile, REL_NOT_FOUND);
} else {
for (Element ele : eles) {
switch(context.getProperty(OUTPUT_TYPE).getValue()) {
case ELEMENT_HTML:
ele.html(modifiedValue);
break;
case ELEMENT_ATTRIBUTE:
ele.attr(context.getProperty(ATTRIBUTE_KEY).evaluateAttributeExpressions(flowFile).getValue(), modifiedValue);
break;
case ELEMENT_TEXT:
ele.text(modifiedValue);
break;
}
}
FlowFile ff = session.write(session.create(flowFile), new StreamCallback() {
@Override
public void process(InputStream in, OutputStream out) throws IOException {
out.write(doc.html().getBytes(StandardCharsets.UTF_8));
}
});
ff = session.putAttribute(ff, NUM_ELEMENTS_MODIFIED_ATTR, new Integer(eles.size()).toString());
session.transfer(ff, REL_SUCCESS);
// Transfer the original HTML
session.transfer(flowFile, REL_ORIGINAL);
}
}
use of org.apache.nifi.processor.io.StreamCallback in project nifi by apache.
the class ConvertAvroSchema method onTrigger.
@Override
public void onTrigger(ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile incomingAvro = session.get();
if (incomingAvro == null) {
return;
}
String inputSchemaProperty = context.getProperty(INPUT_SCHEMA).evaluateAttributeExpressions(incomingAvro).getValue();
final Schema inputSchema;
try {
inputSchema = getSchema(inputSchemaProperty, DefaultConfiguration.get());
} catch (SchemaNotFoundException e) {
getLogger().error("Cannot find schema: " + inputSchemaProperty);
session.transfer(incomingAvro, FAILURE);
return;
}
String outputSchemaProperty = context.getProperty(OUTPUT_SCHEMA).evaluateAttributeExpressions(incomingAvro).getValue();
final Schema outputSchema;
try {
outputSchema = getSchema(outputSchemaProperty, DefaultConfiguration.get());
} catch (SchemaNotFoundException e) {
getLogger().error("Cannot find schema: " + outputSchemaProperty);
session.transfer(incomingAvro, FAILURE);
return;
}
final Map<String, String> fieldMapping = new HashMap<>();
for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
if (entry.getKey().isDynamic()) {
fieldMapping.put(entry.getKey().getName(), entry.getValue());
}
}
// Set locale
final String localeProperty = context.getProperty(LOCALE).getValue();
final Locale locale = localeProperty.equals(DEFAULT_LOCALE_VALUE) ? Locale.getDefault() : LocaleUtils.toLocale(localeProperty);
final AvroRecordConverter converter = new AvroRecordConverter(inputSchema, outputSchema, fieldMapping, locale);
final DataFileWriter<Record> writer = new DataFileWriter<>(AvroUtil.newDatumWriter(outputSchema, Record.class));
writer.setCodec(getCodecFactory(context.getProperty(COMPRESSION_TYPE).getValue()));
final DataFileWriter<Record> failureWriter = new DataFileWriter<>(AvroUtil.newDatumWriter(outputSchema, Record.class));
failureWriter.setCodec(getCodecFactory(context.getProperty(COMPRESSION_TYPE).getValue()));
try {
final AtomicLong written = new AtomicLong(0L);
final FailureTracker failures = new FailureTracker();
final List<Record> badRecords = Lists.newLinkedList();
FlowFile incomingAvroCopy = session.clone(incomingAvro);
FlowFile outgoingAvro = session.write(incomingAvro, new StreamCallback() {
@Override
public void process(InputStream in, OutputStream out) throws IOException {
try (DataFileStream<Record> stream = new DataFileStream<Record>(in, new GenericDatumReader<Record>(converter.getInputSchema()))) {
try (DataFileWriter<Record> w = writer.create(outputSchema, out)) {
for (Record record : stream) {
try {
Record converted = converter.convert(record);
w.append(converted);
written.incrementAndGet();
} catch (AvroConversionException e) {
failures.add(e);
getLogger().error("Error converting data: " + e.getMessage());
badRecords.add(record);
}
}
}
}
}
});
FlowFile badOutput = session.write(incomingAvroCopy, new StreamCallback() {
@Override
public void process(InputStream in, OutputStream out) throws IOException {
try (DataFileWriter<Record> w = failureWriter.create(inputSchema, out)) {
for (Record record : badRecords) {
w.append(record);
}
}
}
});
long errors = failures.count();
// update only if file transfer is successful
session.adjustCounter("Converted records", written.get(), false);
// update only if file transfer is successful
session.adjustCounter("Conversion errors", errors, false);
if (written.get() > 0L) {
session.transfer(outgoingAvro, SUCCESS);
} else {
session.remove(outgoingAvro);
if (errors == 0L) {
badOutput = session.putAttribute(badOutput, "errors", "No incoming records");
session.transfer(badOutput, FAILURE);
}
}
if (errors > 0L) {
getLogger().warn("Failed to convert {}/{} records between Avro Schemas", new Object[] { errors, errors + written.get() });
badOutput = session.putAttribute(badOutput, "errors", failures.summary());
session.transfer(badOutput, FAILURE);
} else {
session.remove(badOutput);
}
} catch (ProcessException | DatasetIOException e) {
getLogger().error("Failed reading or writing", e);
session.transfer(incomingAvro, FAILURE);
} catch (DatasetException e) {
getLogger().error("Failed to read FlowFile", e);
session.transfer(incomingAvro, FAILURE);
} finally {
try {
writer.close();
} catch (IOException e) {
getLogger().warn("Unable to close writer ressource", e);
}
try {
failureWriter.close();
} catch (IOException e) {
getLogger().warn("Unable to close writer ressource", e);
}
}
}
Aggregations