use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.
the class YandexTranslate method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final StopWatch stopWatch = new StopWatch(true);
final String key = context.getProperty(KEY).getValue();
final String sourceLanguage = context.getProperty(SOURCE_LANGUAGE).evaluateAttributeExpressions(flowFile).getValue();
final String targetLanguage = context.getProperty(TARGET_LANGUAGE).evaluateAttributeExpressions(flowFile).getValue();
final String encoding = context.getProperty(CHARACTER_SET).evaluateAttributeExpressions(flowFile).getValue();
final List<String> attributeNames = new ArrayList<>();
final List<String> textValues = new ArrayList<>();
for (final PropertyDescriptor descriptor : context.getProperties().keySet()) {
if (descriptor.isDynamic()) {
// add to list so that we know the order when the translations come back.
attributeNames.add(descriptor.getName());
textValues.add(context.getProperty(descriptor).evaluateAttributeExpressions(flowFile).getValue());
}
}
if (context.getProperty(TRANSLATE_CONTENT).asBoolean()) {
final byte[] buff = new byte[(int) flowFile.getSize()];
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
StreamUtils.fillBuffer(in, buff);
}
});
final String content = new String(buff, Charset.forName(encoding));
textValues.add(content);
}
final Invocation invocation = prepareResource(key, textValues, sourceLanguage, targetLanguage);
final Response response;
try {
response = invocation.invoke();
} catch (final Exception e) {
getLogger().error("Failed to make request to Yandex to transate text for {} due to {}; routing to comms.failure", new Object[] { flowFile, e });
session.transfer(flowFile, REL_COMMS_FAILURE);
return;
}
if (response.getStatus() != Response.Status.OK.getStatusCode()) {
getLogger().error("Failed to translate text using Yandex for {}; response was {}: {}; routing to {}", new Object[] { flowFile, response.getStatus(), response.getStatusInfo().getReasonPhrase(), REL_TRANSLATION_FAILED.getName() });
flowFile = session.putAttribute(flowFile, "yandex.translate.failure.reason", response.getStatusInfo().getReasonPhrase());
session.transfer(flowFile, REL_TRANSLATION_FAILED);
return;
}
final Map<String, String> newAttributes = new HashMap<>();
final Translation translation = response.readEntity(Translation.class);
final List<String> texts = translation.getText();
for (int i = 0; i < texts.size(); i++) {
final String text = texts.get(i);
if (i < attributeNames.size()) {
final String attributeName = attributeNames.get(i);
newAttributes.put(attributeName, text);
} else {
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
out.write(text.getBytes(encoding));
}
});
newAttributes.put("language", targetLanguage);
}
}
if (!newAttributes.isEmpty()) {
flowFile = session.putAllAttributes(flowFile, newAttributes);
}
stopWatch.stop();
session.transfer(flowFile, REL_SUCCESS);
getLogger().info("Successfully translated {} items for {} from {} to {} in {}; routing to success", new Object[] { texts.size(), flowFile, sourceLanguage, targetLanguage, stopWatch.getDuration() });
}
use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.
the class QueryRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
final FlowFile original = session.get();
if (original == null) {
return;
}
final StopWatch stopWatch = new StopWatch(true);
final RecordSetWriterFactory recordSetWriterFactory = context.getProperty(RECORD_WRITER_FACTORY).asControllerService(RecordSetWriterFactory.class);
final RecordReaderFactory recordReaderFactory = context.getProperty(RECORD_READER_FACTORY).asControllerService(RecordReaderFactory.class);
final Map<FlowFile, Relationship> transformedFlowFiles = new HashMap<>();
final Set<FlowFile> createdFlowFiles = new HashSet<>();
// Determine the Record Reader's schema
final RecordSchema readerSchema;
try (final InputStream rawIn = session.read(original)) {
final Map<String, String> originalAttributes = original.getAttributes();
final RecordReader reader = recordReaderFactory.createRecordReader(originalAttributes, rawIn, getLogger());
final RecordSchema inputSchema = reader.getSchema();
readerSchema = recordSetWriterFactory.getSchema(originalAttributes, inputSchema);
} catch (final Exception e) {
getLogger().error("Failed to determine Record Schema from {}; routing to failure", new Object[] { original, e });
session.transfer(original, REL_FAILURE);
return;
}
// Determine the schema for writing the data
final Map<String, String> originalAttributes = original.getAttributes();
int recordsRead = 0;
try {
for (final PropertyDescriptor descriptor : context.getProperties().keySet()) {
if (!descriptor.isDynamic()) {
continue;
}
final Relationship relationship = new Relationship.Builder().name(descriptor.getName()).build();
// We have to fork a child because we may need to read the input FlowFile more than once,
// and we cannot call session.read() on the original FlowFile while we are within a write
// callback for the original FlowFile.
FlowFile transformed = session.create(original);
boolean flowFileRemoved = false;
try {
final String sql = context.getProperty(descriptor).evaluateAttributeExpressions(original).getValue();
final AtomicReference<WriteResult> writeResultRef = new AtomicReference<>();
final QueryResult queryResult;
if (context.getProperty(CACHE_SCHEMA).asBoolean()) {
queryResult = queryWithCache(session, original, sql, context, recordReaderFactory);
} else {
queryResult = query(session, original, sql, context, recordReaderFactory);
}
final AtomicReference<String> mimeTypeRef = new AtomicReference<>();
try {
final ResultSet rs = queryResult.getResultSet();
transformed = session.write(transformed, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
final ResultSetRecordSet recordSet;
final RecordSchema writeSchema;
try {
recordSet = new ResultSetRecordSet(rs, readerSchema);
final RecordSchema resultSetSchema = recordSet.getSchema();
writeSchema = recordSetWriterFactory.getSchema(originalAttributes, resultSetSchema);
} catch (final SQLException | SchemaNotFoundException e) {
throw new ProcessException(e);
}
try (final RecordSetWriter resultSetWriter = recordSetWriterFactory.createWriter(getLogger(), writeSchema, out)) {
writeResultRef.set(resultSetWriter.write(recordSet));
mimeTypeRef.set(resultSetWriter.getMimeType());
} catch (final Exception e) {
throw new IOException(e);
}
}
});
} finally {
closeQuietly(queryResult);
}
recordsRead = Math.max(recordsRead, queryResult.getRecordsRead());
final WriteResult result = writeResultRef.get();
if (result.getRecordCount() == 0 && !context.getProperty(INCLUDE_ZERO_RECORD_FLOWFILES).asBoolean()) {
session.remove(transformed);
flowFileRemoved = true;
transformedFlowFiles.remove(transformed);
getLogger().info("Transformed {} but the result contained no data so will not pass on a FlowFile", new Object[] { original });
} else {
final Map<String, String> attributesToAdd = new HashMap<>();
if (result.getAttributes() != null) {
attributesToAdd.putAll(result.getAttributes());
}
attributesToAdd.put(CoreAttributes.MIME_TYPE.key(), mimeTypeRef.get());
attributesToAdd.put("record.count", String.valueOf(result.getRecordCount()));
transformed = session.putAllAttributes(transformed, attributesToAdd);
transformedFlowFiles.put(transformed, relationship);
session.adjustCounter("Records Written", result.getRecordCount(), false);
}
} finally {
// Ensure that we have the FlowFile in the set in case we throw any Exception
if (!flowFileRemoved) {
createdFlowFiles.add(transformed);
}
}
}
final long elapsedMillis = stopWatch.getElapsed(TimeUnit.MILLISECONDS);
if (transformedFlowFiles.size() > 0) {
session.getProvenanceReporter().fork(original, transformedFlowFiles.keySet(), elapsedMillis);
for (final Map.Entry<FlowFile, Relationship> entry : transformedFlowFiles.entrySet()) {
final FlowFile transformed = entry.getKey();
final Relationship relationship = entry.getValue();
session.getProvenanceReporter().route(transformed, relationship);
session.transfer(transformed, relationship);
}
}
getLogger().info("Successfully queried {} in {} millis", new Object[] { original, elapsedMillis });
session.transfer(original, REL_ORIGINAL);
} catch (final SQLException e) {
getLogger().error("Unable to query {} due to {}", new Object[] { original, e.getCause() == null ? e : e.getCause() });
session.remove(createdFlowFiles);
session.transfer(original, REL_FAILURE);
} catch (final Exception e) {
getLogger().error("Unable to query {} due to {}", new Object[] { original, e });
session.remove(createdFlowFiles);
session.transfer(original, REL_FAILURE);
}
session.adjustCounter("Records Read", recordsRead, false);
}
use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.
the class RouteText method appendLine.
private void appendLine(final ProcessSession session, final Map<Relationship, Map<Group, FlowFile>> flowFileMap, final Relationship relationship, final FlowFile original, final String line, final Charset charset, final Group group) {
Map<Group, FlowFile> groupToFlowFileMap = flowFileMap.get(relationship);
if (groupToFlowFileMap == null) {
groupToFlowFileMap = new HashMap<>();
flowFileMap.put(relationship, groupToFlowFileMap);
}
FlowFile flowFile = groupToFlowFileMap.get(group);
if (flowFile == null) {
flowFile = session.create(original);
}
flowFile = session.append(flowFile, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
out.write(line.getBytes(charset));
}
});
groupToFlowFileMap.put(group, flowFile);
}
use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.
the class SplitText method concatenateContents.
/**
* Will concatenate the contents of the provided array of {@link FlowFile}s
* into a single {@link FlowFile}. While this operation is as general as it
* is described in the previous sentence, in the context of this processor
* there can only be two {@link FlowFile}s with the first {@link FlowFile}
* representing the header content of the split and the second
* {@link FlowFile} represents the split itself.
*/
private FlowFile concatenateContents(FlowFile sourceFlowFile, ProcessSession session, FlowFile... flowFiles) {
FlowFile mergedFlowFile = session.create(sourceFlowFile);
for (FlowFile flowFile : flowFiles) {
mergedFlowFile = session.append(mergedFlowFile, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
try (InputStream is = session.read(flowFile)) {
IOUtils.copy(is, out);
}
}
});
}
// in current usage we always have 2 files
session.remove(flowFiles[1]);
return mergedFlowFile;
}
use of org.apache.nifi.processor.io.OutputStreamCallback in project nifi by apache.
the class TailFile method processTailFile.
private void processTailFile(final ProcessContext context, final ProcessSession session, final String tailFile) {
// If user changes the file that is being tailed, we need to consume the already-rolled-over data according
// to the Initial Start Position property
boolean rolloverOccurred;
TailFileObject tfo = states.get(tailFile);
if (tfo.isTailFileChanged()) {
rolloverOccurred = false;
final String recoverPosition = context.getProperty(START_POSITION).getValue();
if (START_BEGINNING_OF_TIME.getValue().equals(recoverPosition)) {
recoverRolledFiles(context, session, tailFile, tfo.getExpectedRecoveryChecksum(), tfo.getState().getTimestamp(), tfo.getState().getPosition());
} else if (START_CURRENT_FILE.getValue().equals(recoverPosition)) {
cleanup();
tfo.setState(new TailFileState(tailFile, null, null, 0L, 0L, 0L, null, tfo.getState().getBuffer()));
} else {
final String filename = tailFile;
final File file = new File(filename);
try {
final FileChannel fileChannel = FileChannel.open(file.toPath(), StandardOpenOption.READ);
getLogger().debug("Created FileChannel {} for {}", new Object[] { fileChannel, file });
final Checksum checksum = new CRC32();
final long position = file.length();
final long timestamp = file.lastModified();
try (final InputStream fis = new FileInputStream(file);
final CheckedInputStream in = new CheckedInputStream(fis, checksum)) {
StreamUtils.copy(in, new NullOutputStream(), position);
}
fileChannel.position(position);
cleanup();
tfo.setState(new TailFileState(filename, file, fileChannel, position, timestamp, file.length(), checksum, tfo.getState().getBuffer()));
} catch (final IOException ioe) {
getLogger().error("Attempted to position Reader at current position in file {} but failed to do so due to {}", new Object[] { file, ioe.toString() }, ioe);
context.yield();
return;
}
}
tfo.setTailFileChanged(false);
} else {
// Recover any data that may have rolled over since the last time that this processor ran.
// If expectedRecoveryChecksum != null, that indicates that this is the first iteration since processor was started, so use whatever checksum value
// was present when the state was last persisted. In this case, we must then null out the value so that the next iteration won't keep using the "recovered"
// value. If the value is null, then we know that either the processor has already recovered that data, or there was no state persisted. In either case,
// use whatever checksum value is currently in the state.
Long expectedChecksumValue = tfo.getExpectedRecoveryChecksum();
if (expectedChecksumValue == null) {
expectedChecksumValue = tfo.getState().getChecksum() == null ? null : tfo.getState().getChecksum().getValue();
}
rolloverOccurred = recoverRolledFiles(context, session, tailFile, expectedChecksumValue, tfo.getState().getTimestamp(), tfo.getState().getPosition());
tfo.setExpectedRecoveryChecksum(null);
}
// initialize local variables from state object; this is done so that we can easily change the values throughout
// the onTrigger method and then create a new state object after we finish processing the files.
TailFileState state = tfo.getState();
File file = state.getFile();
FileChannel reader = state.getReader();
Checksum checksum = state.getChecksum();
if (checksum == null) {
checksum = new CRC32();
}
long position = state.getPosition();
long timestamp = state.getTimestamp();
long length = state.getLength();
// Create a reader if necessary.
if (file == null || reader == null) {
file = new File(tailFile);
reader = createReader(file, position);
if (reader == null) {
context.yield();
return;
}
}
final long startNanos = System.nanoTime();
// Check if file has rotated
// We determine that the file has rotated if any of the following conditions are met:
// 1. 'rolloverOccured' == true, which indicates that we have found a new file matching the rollover pattern.
// 2. The file was modified after the timestamp in our state, AND the file is smaller than we expected. This satisfies
// the case where we are tailing File A, and that file is then renamed (say to B) and a new file named A is created
// and is written to. In such a case, File A may have a file size smaller than we have in our state, so we know that
// it rolled over.
// 3. The File Channel that we have indicates that the size of the file is different than file.length() indicates, AND
// the File Channel also indicates that we have read all data in the file. This case may also occur in the same scenario
// as #2, above. In this case, the File Channel is pointing to File A, but the 'file' object is pointing to File B. They
// both have the same name but are different files. As a result, once we have consumed all data from the File Channel,
// we want to roll over and consume data from the new file.
boolean rotated = rolloverOccurred;
if (!rotated) {
final long fileLength = file.length();
if (length > fileLength) {
rotated = true;
} else {
try {
final long readerSize = reader.size();
final long readerPosition = reader.position();
if (readerSize == readerPosition && readerSize != fileLength) {
rotated = true;
}
} catch (final IOException e) {
getLogger().warn("Failed to determined the size or position of the File Channel when " + "determining if the file has rolled over. Will assume that the file being tailed has not rolled over", e);
}
}
}
if (rotated) {
// Since file has rotated, we close the reader, create a new one, and then reset our state.
try {
reader.close();
getLogger().debug("Closed FileChannel {}", new Object[] { reader, reader });
} catch (final IOException ioe) {
getLogger().warn("Failed to close reader for {} due to {}", new Object[] { file, ioe });
}
reader = createReader(file, 0L);
position = 0L;
checksum.reset();
}
if (file.length() == position || !file.exists()) {
// no data to consume so rather than continually running, yield to allow other processors to use the thread.
getLogger().debug("No data to consume; created no FlowFiles");
tfo.setState(new TailFileState(tailFile, file, reader, position, timestamp, length, checksum, state.getBuffer()));
persistState(tfo, context);
context.yield();
return;
}
// If there is data to consume, read as much as we can.
final TailFileState currentState = state;
final Checksum chksum = checksum;
// data has been written to file. Stream it to a new FlowFile.
FlowFile flowFile = session.create();
final FileChannel fileReader = reader;
final AtomicLong positionHolder = new AtomicLong(position);
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(final OutputStream rawOut) throws IOException {
try (final OutputStream out = new BufferedOutputStream(rawOut)) {
positionHolder.set(readLines(fileReader, currentState.getBuffer(), out, chksum));
}
}
});
// If there ended up being no data, just remove the FlowFile
if (flowFile.getSize() == 0) {
session.remove(flowFile);
getLogger().debug("No data to consume; removed created FlowFile");
} else {
// determine filename for FlowFile by using <base filename of log file>.<initial offset>-<final offset>.<extension>
final String tailFilename = file.getName();
final String baseName = StringUtils.substringBeforeLast(tailFilename, ".");
final String flowFileName;
if (baseName.length() < tailFilename.length()) {
flowFileName = baseName + "." + position + "-" + positionHolder.get() + "." + StringUtils.substringAfterLast(tailFilename, ".");
} else {
flowFileName = baseName + "." + position + "-" + positionHolder.get();
}
final Map<String, String> attributes = new HashMap<>(3);
attributes.put(CoreAttributes.FILENAME.key(), flowFileName);
attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain");
attributes.put("tailfile.original.path", tailFile);
flowFile = session.putAllAttributes(flowFile, attributes);
session.getProvenanceReporter().receive(flowFile, file.toURI().toString(), "FlowFile contains bytes " + position + " through " + positionHolder.get() + " of source file", TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos));
session.transfer(flowFile, REL_SUCCESS);
position = positionHolder.get();
// Set timestamp to the latest of when the file was modified and the current timestamp stored in the state.
// We do this because when we read a file that has been rolled over, we set the state to 1 millisecond later than the last mod date
// in order to avoid ingesting that file again. If we then read from this file during the same second (or millisecond, depending on the
// operating system file last mod precision), then we could set the timestamp to a smaller value, which could result in reading in the
// rotated file a second time.
timestamp = Math.max(state.getTimestamp(), file.lastModified());
length = file.length();
getLogger().debug("Created {} and routed to success", new Object[] { flowFile });
}
// Create a new state object to represent our current position, timestamp, etc.
tfo.setState(new TailFileState(tailFile, file, reader, position, timestamp, length, checksum, state.getBuffer()));
// We must commit session before persisting state in order to avoid data loss on restart
session.commit();
persistState(tfo, context);
}
Aggregations