use of org.apache.nifi.stream.io.BufferedInputStream in project nifi by apache.
the class EvaluateXQuery method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
final List<FlowFile> flowFileBatch = session.get(50);
if (flowFileBatch.isEmpty()) {
return;
}
final ComponentLog logger = getLogger();
final Map<String, XQueryExecutable> attributeToXQueryMap = new HashMap<>();
final Processor proc = new Processor(false);
final XMLReader xmlReader;
try {
xmlReader = XMLReaderFactory.createXMLReader();
} catch (SAXException e) {
logger.error("Error while constructing XMLReader {}", new Object[] { e });
throw new ProcessException(e.getMessage());
}
if (!context.getProperty(VALIDATE_DTD).asBoolean()) {
xmlReader.setEntityResolver(new EntityResolver() {
@Override
public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
return new InputSource(new StringReader(""));
}
});
}
final XQueryCompiler comp = proc.newXQueryCompiler();
for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
if (!entry.getKey().isDynamic()) {
continue;
}
final XQueryExecutable exp;
try {
exp = comp.compile(entry.getValue());
attributeToXQueryMap.put(entry.getKey().getName(), exp);
} catch (SaxonApiException e) {
// should not happen because we've already validated the XQuery (in XQueryValidator)
throw new ProcessException(e);
}
}
final XQueryExecutable slashExpression;
try {
slashExpression = comp.compile("/");
} catch (SaxonApiException e) {
logger.error("unable to compile XQuery expression due to {}", new Object[] { e });
session.transfer(flowFileBatch, REL_FAILURE);
return;
}
final String destination = context.getProperty(DESTINATION).getValue();
flowFileLoop: for (FlowFile flowFile : flowFileBatch) {
if (!isScheduled()) {
session.rollback();
return;
}
final AtomicReference<Throwable> error = new AtomicReference<>(null);
final AtomicReference<XdmNode> sourceRef = new AtomicReference<>(null);
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream rawIn) throws IOException {
try (final InputStream in = new BufferedInputStream(rawIn)) {
XQueryEvaluator qe = slashExpression.load();
qe.setSource(new SAXSource(xmlReader, new InputSource(in)));
DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
dfactory.setNamespaceAware(true);
Document dom = dfactory.newDocumentBuilder().newDocument();
qe.run(new DOMDestination(dom));
XdmNode rootNode = proc.newDocumentBuilder().wrap(dom);
sourceRef.set(rootNode);
} catch (final Exception e) {
error.set(e);
}
}
});
if (error.get() != null) {
logger.error("unable to evaluate XQuery against {} due to {}; routing to 'failure'", new Object[] { flowFile, error.get() });
session.transfer(flowFile, REL_FAILURE);
continue;
}
final Map<String, String> xQueryResults = new HashMap<>();
List<FlowFile> childrenFlowFiles = new ArrayList<>();
for (final Map.Entry<String, XQueryExecutable> entry : attributeToXQueryMap.entrySet()) {
try {
XQueryEvaluator qe = entry.getValue().load();
qe.setContextItem(sourceRef.get());
XdmValue result = qe.evaluate();
if (DESTINATION_ATTRIBUTE.equals(destination)) {
int index = 1;
for (XdmItem item : result) {
String value = formatItem(item, context);
String attributeName = entry.getKey();
if (result.size() > 1) {
attributeName += "." + index++;
}
xQueryResults.put(attributeName, value);
}
} else {
// if (DESTINATION_CONTENT.equals(destination)){
if (result.size() == 0) {
logger.info("Routing {} to 'unmatched'", new Object[] { flowFile });
session.transfer(flowFile, REL_NO_MATCH);
continue flowFileLoop;
} else if (result.size() == 1) {
final XdmItem item = result.itemAt(0);
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(final OutputStream rawOut) throws IOException {
try (final OutputStream out = new BufferedOutputStream(rawOut)) {
writeformattedItem(item, context, out);
} catch (TransformerFactoryConfigurationError | TransformerException e) {
throw new IOException(e);
}
}
});
} else {
for (final XdmItem item : result) {
FlowFile ff = session.clone(flowFile);
ff = session.write(ff, new OutputStreamCallback() {
@Override
public void process(final OutputStream rawOut) throws IOException {
try (final OutputStream out = new BufferedOutputStream(rawOut)) {
try {
writeformattedItem(item, context, out);
} catch (TransformerFactoryConfigurationError | TransformerException e) {
throw new IOException(e);
}
}
}
});
childrenFlowFiles.add(ff);
}
}
}
} catch (final SaxonApiException e) {
logger.error("failed to evaluate XQuery for {} for Property {} due to {}; routing to failure", new Object[] { flowFile, entry.getKey(), e });
session.transfer(flowFile, REL_FAILURE);
session.remove(childrenFlowFiles);
continue flowFileLoop;
} catch (TransformerFactoryConfigurationError | TransformerException | IOException e) {
logger.error("Failed to write XQuery result for {} due to {}; routing original to 'failure'", new Object[] { flowFile, error.get() });
session.transfer(flowFile, REL_FAILURE);
session.remove(childrenFlowFiles);
continue flowFileLoop;
}
}
if (DESTINATION_ATTRIBUTE.equals(destination)) {
flowFile = session.putAllAttributes(flowFile, xQueryResults);
final Relationship destRel = xQueryResults.isEmpty() ? REL_NO_MATCH : REL_MATCH;
logger.info("Successfully evaluated XQueries against {} and found {} matches; routing to {}", new Object[] { flowFile, xQueryResults.size(), destRel.getName() });
session.transfer(flowFile, destRel);
session.getProvenanceReporter().modifyAttributes(flowFile);
} else {
// if (DESTINATION_CONTENT.equals(destination)) {
if (!childrenFlowFiles.isEmpty()) {
logger.info("Successfully created {} new FlowFiles from {}; routing all to 'matched'", new Object[] { childrenFlowFiles.size(), flowFile });
session.transfer(childrenFlowFiles, REL_MATCH);
session.remove(flowFile);
} else {
logger.info("Successfully updated content for {}; routing to 'matched'", new Object[] { flowFile });
session.transfer(flowFile, REL_MATCH);
session.getProvenanceReporter().modifyContent(flowFile);
}
}
}
// end flowFileLoop
}
use of org.apache.nifi.stream.io.BufferedInputStream in project nifi by apache.
the class ScanContent method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final ComponentLog logger = getLogger();
final SynchronousFileWatcher fileWatcher = fileWatcherRef.get();
try {
if (fileWatcher.checkAndReset()) {
reloadDictionary(context, true, logger);
}
} catch (final IOException e) {
throw new ProcessException(e);
}
Search<byte[]> search = searchRef.get();
try {
if (search == null) {
if (reloadDictionary(context, false, logger)) {
search = searchRef.get();
}
}
} catch (final IOException e) {
throw new ProcessException(e);
}
if (search == null) {
return;
}
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final Search<byte[]> finalSearch = search;
final AtomicReference<SearchTerm<byte[]>> termRef = new AtomicReference<>(null);
termRef.set(null);
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream rawIn) throws IOException {
try (final InputStream in = new BufferedInputStream(rawIn)) {
final SearchState<byte[]> searchResult = finalSearch.search(in, false);
if (searchResult.foundMatch()) {
termRef.set(searchResult.getResults().keySet().iterator().next());
}
}
}
});
final SearchTerm<byte[]> matchingTerm = termRef.get();
if (matchingTerm == null) {
logger.info("Routing {} to 'unmatched'", new Object[] { flowFile });
session.getProvenanceReporter().route(flowFile, REL_NO_MATCH);
session.transfer(flowFile, REL_NO_MATCH);
} else {
final String matchingTermString = matchingTerm.toString(UTF8);
logger.info("Routing {} to 'matched' because it matched term {}", new Object[] { flowFile, matchingTermString });
flowFile = session.putAttribute(flowFile, MATCH_ATTRIBUTE_KEY, matchingTermString);
session.getProvenanceReporter().route(flowFile, REL_MATCH);
session.transfer(flowFile, REL_MATCH);
}
}
use of org.apache.nifi.stream.io.BufferedInputStream in project nifi by apache.
the class SplitContent method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final ComponentLog logger = getLogger();
final boolean keepSequence = context.getProperty(KEEP_SEQUENCE).asBoolean();
final boolean keepTrailingSequence;
final boolean keepLeadingSequence;
if (keepSequence) {
if (context.getProperty(BYTE_SEQUENCE_LOCATION).getValue().equals(TRAILING_POSITION.getValue())) {
keepTrailingSequence = true;
keepLeadingSequence = false;
} else {
keepTrailingSequence = false;
keepLeadingSequence = true;
}
} else {
keepTrailingSequence = false;
keepLeadingSequence = false;
}
final byte[] byteSequence = this.byteSequence.get();
if (byteSequence == null) {
// should never happen. But just in case...
logger.error("{} Unable to obtain Byte Sequence", new Object[] { this });
session.rollback();
return;
}
final List<Tuple<Long, Long>> splits = new ArrayList<>();
final NaiveSearchRingBuffer buffer = new NaiveSearchRingBuffer(byteSequence);
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream rawIn) throws IOException {
long bytesRead = 0L;
long startOffset = 0L;
try (final InputStream in = new BufferedInputStream(rawIn)) {
while (true) {
final int nextByte = in.read();
if (nextByte == -1) {
return;
}
bytesRead++;
boolean matched = buffer.addAndCompare((byte) (nextByte & 0xFF));
if (matched) {
long splitLength;
if (keepTrailingSequence) {
splitLength = bytesRead - startOffset;
} else {
splitLength = bytesRead - startOffset - byteSequence.length;
}
if (keepLeadingSequence && startOffset > 0) {
splitLength += byteSequence.length;
}
final long splitStart = (keepLeadingSequence && startOffset > 0) ? startOffset - byteSequence.length : startOffset;
splits.add(new Tuple<>(splitStart, splitLength));
startOffset = bytesRead;
buffer.clear();
}
}
}
}
});
long lastOffsetPlusSize = -1L;
if (splits.isEmpty()) {
FlowFile clone = session.clone(flowFile);
session.transfer(flowFile, REL_ORIGINAL);
session.transfer(clone, REL_SPLITS);
logger.info("Found no match for {}; transferring original 'original' and transferring clone {} to 'splits'", new Object[] { flowFile, clone });
return;
}
final ArrayList<FlowFile> splitList = new ArrayList<>();
for (final Tuple<Long, Long> tuple : splits) {
long offset = tuple.getKey();
long size = tuple.getValue();
if (size > 0) {
FlowFile split = session.clone(flowFile, offset, size);
splitList.add(split);
}
lastOffsetPlusSize = offset + size;
}
// lastOffsetPlusSize indicates the ending position of the last split.
// if the data didn't end with the byte sequence, we need one final split to run from the end
// of the last split to the end of the content.
long finalSplitOffset = lastOffsetPlusSize;
if (!keepTrailingSequence && !keepLeadingSequence) {
finalSplitOffset += byteSequence.length;
}
if (finalSplitOffset > -1L && finalSplitOffset < flowFile.getSize()) {
FlowFile finalSplit = session.clone(flowFile, finalSplitOffset, flowFile.getSize() - finalSplitOffset);
splitList.add(finalSplit);
}
final String fragmentId = finishFragmentAttributes(session, flowFile, splitList);
session.transfer(splitList, REL_SPLITS);
flowFile = FragmentAttributes.copyAttributesToOriginal(session, flowFile, fragmentId, splitList.size());
session.transfer(flowFile, REL_ORIGINAL);
if (splitList.size() > 10) {
logger.info("Split {} into {} files", new Object[] { flowFile, splitList.size() });
} else {
logger.info("Split {} into {} files: {}", new Object[] { flowFile, splitList.size(), splitList });
}
}
use of org.apache.nifi.stream.io.BufferedInputStream in project nifi by apache.
the class TarUnpackerSequenceFileWriter method processInputStream.
@Override
protected void processInputStream(final InputStream stream, final FlowFile tarArchivedFlowFile, final Writer writer) throws IOException {
try (final TarArchiveInputStream tarIn = new TarArchiveInputStream(new BufferedInputStream(stream))) {
TarArchiveEntry tarEntry;
while ((tarEntry = tarIn.getNextTarEntry()) != null) {
if (tarEntry.isDirectory()) {
continue;
}
final String key = tarEntry.getName();
final long fileSize = tarEntry.getSize();
final InputStreamWritable inStreamWritable = new InputStreamWritable(tarIn, (int) fileSize);
writer.append(new Text(key), inStreamWritable);
logger.debug("Appending FlowFile {} to Sequence File", new Object[] { key });
}
}
}
Aggregations