Search in sources :

Example 11 with BufferedInputStream

use of org.apache.nifi.stream.io.BufferedInputStream in project nifi by apache.

the class EvaluateXQuery method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    final List<FlowFile> flowFileBatch = session.get(50);
    if (flowFileBatch.isEmpty()) {
        return;
    }
    final ComponentLog logger = getLogger();
    final Map<String, XQueryExecutable> attributeToXQueryMap = new HashMap<>();
    final Processor proc = new Processor(false);
    final XMLReader xmlReader;
    try {
        xmlReader = XMLReaderFactory.createXMLReader();
    } catch (SAXException e) {
        logger.error("Error while constructing XMLReader {}", new Object[] { e });
        throw new ProcessException(e.getMessage());
    }
    if (!context.getProperty(VALIDATE_DTD).asBoolean()) {
        xmlReader.setEntityResolver(new EntityResolver() {

            @Override
            public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
                return new InputSource(new StringReader(""));
            }
        });
    }
    final XQueryCompiler comp = proc.newXQueryCompiler();
    for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
        if (!entry.getKey().isDynamic()) {
            continue;
        }
        final XQueryExecutable exp;
        try {
            exp = comp.compile(entry.getValue());
            attributeToXQueryMap.put(entry.getKey().getName(), exp);
        } catch (SaxonApiException e) {
            // should not happen because we've already validated the XQuery (in XQueryValidator)
            throw new ProcessException(e);
        }
    }
    final XQueryExecutable slashExpression;
    try {
        slashExpression = comp.compile("/");
    } catch (SaxonApiException e) {
        logger.error("unable to compile XQuery expression due to {}", new Object[] { e });
        session.transfer(flowFileBatch, REL_FAILURE);
        return;
    }
    final String destination = context.getProperty(DESTINATION).getValue();
    flowFileLoop: for (FlowFile flowFile : flowFileBatch) {
        if (!isScheduled()) {
            session.rollback();
            return;
        }
        final AtomicReference<Throwable> error = new AtomicReference<>(null);
        final AtomicReference<XdmNode> sourceRef = new AtomicReference<>(null);
        session.read(flowFile, new InputStreamCallback() {

            @Override
            public void process(final InputStream rawIn) throws IOException {
                try (final InputStream in = new BufferedInputStream(rawIn)) {
                    XQueryEvaluator qe = slashExpression.load();
                    qe.setSource(new SAXSource(xmlReader, new InputSource(in)));
                    DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
                    dfactory.setNamespaceAware(true);
                    Document dom = dfactory.newDocumentBuilder().newDocument();
                    qe.run(new DOMDestination(dom));
                    XdmNode rootNode = proc.newDocumentBuilder().wrap(dom);
                    sourceRef.set(rootNode);
                } catch (final Exception e) {
                    error.set(e);
                }
            }
        });
        if (error.get() != null) {
            logger.error("unable to evaluate XQuery against {} due to {}; routing to 'failure'", new Object[] { flowFile, error.get() });
            session.transfer(flowFile, REL_FAILURE);
            continue;
        }
        final Map<String, String> xQueryResults = new HashMap<>();
        List<FlowFile> childrenFlowFiles = new ArrayList<>();
        for (final Map.Entry<String, XQueryExecutable> entry : attributeToXQueryMap.entrySet()) {
            try {
                XQueryEvaluator qe = entry.getValue().load();
                qe.setContextItem(sourceRef.get());
                XdmValue result = qe.evaluate();
                if (DESTINATION_ATTRIBUTE.equals(destination)) {
                    int index = 1;
                    for (XdmItem item : result) {
                        String value = formatItem(item, context);
                        String attributeName = entry.getKey();
                        if (result.size() > 1) {
                            attributeName += "." + index++;
                        }
                        xQueryResults.put(attributeName, value);
                    }
                } else {
                    // if (DESTINATION_CONTENT.equals(destination)){
                    if (result.size() == 0) {
                        logger.info("Routing {} to 'unmatched'", new Object[] { flowFile });
                        session.transfer(flowFile, REL_NO_MATCH);
                        continue flowFileLoop;
                    } else if (result.size() == 1) {
                        final XdmItem item = result.itemAt(0);
                        flowFile = session.write(flowFile, new OutputStreamCallback() {

                            @Override
                            public void process(final OutputStream rawOut) throws IOException {
                                try (final OutputStream out = new BufferedOutputStream(rawOut)) {
                                    writeformattedItem(item, context, out);
                                } catch (TransformerFactoryConfigurationError | TransformerException e) {
                                    throw new IOException(e);
                                }
                            }
                        });
                    } else {
                        for (final XdmItem item : result) {
                            FlowFile ff = session.clone(flowFile);
                            ff = session.write(ff, new OutputStreamCallback() {

                                @Override
                                public void process(final OutputStream rawOut) throws IOException {
                                    try (final OutputStream out = new BufferedOutputStream(rawOut)) {
                                        try {
                                            writeformattedItem(item, context, out);
                                        } catch (TransformerFactoryConfigurationError | TransformerException e) {
                                            throw new IOException(e);
                                        }
                                    }
                                }
                            });
                            childrenFlowFiles.add(ff);
                        }
                    }
                }
            } catch (final SaxonApiException e) {
                logger.error("failed to evaluate XQuery for {} for Property {} due to {}; routing to failure", new Object[] { flowFile, entry.getKey(), e });
                session.transfer(flowFile, REL_FAILURE);
                session.remove(childrenFlowFiles);
                continue flowFileLoop;
            } catch (TransformerFactoryConfigurationError | TransformerException | IOException e) {
                logger.error("Failed to write XQuery result for {} due to {}; routing original to 'failure'", new Object[] { flowFile, error.get() });
                session.transfer(flowFile, REL_FAILURE);
                session.remove(childrenFlowFiles);
                continue flowFileLoop;
            }
        }
        if (DESTINATION_ATTRIBUTE.equals(destination)) {
            flowFile = session.putAllAttributes(flowFile, xQueryResults);
            final Relationship destRel = xQueryResults.isEmpty() ? REL_NO_MATCH : REL_MATCH;
            logger.info("Successfully evaluated XQueries against {} and found {} matches; routing to {}", new Object[] { flowFile, xQueryResults.size(), destRel.getName() });
            session.transfer(flowFile, destRel);
            session.getProvenanceReporter().modifyAttributes(flowFile);
        } else {
            // if (DESTINATION_CONTENT.equals(destination)) {
            if (!childrenFlowFiles.isEmpty()) {
                logger.info("Successfully created {} new FlowFiles from {}; routing all to 'matched'", new Object[] { childrenFlowFiles.size(), flowFile });
                session.transfer(childrenFlowFiles, REL_MATCH);
                session.remove(flowFile);
            } else {
                logger.info("Successfully updated content for {}; routing to 'matched'", new Object[] { flowFile });
                session.transfer(flowFile, REL_MATCH);
                session.getProvenanceReporter().modifyContent(flowFile);
            }
        }
    }
// end flowFileLoop
}
Also used : InputSource(org.xml.sax.InputSource) Processor(net.sf.saxon.s9api.Processor) AbstractProcessor(org.apache.nifi.processor.AbstractProcessor) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) HashMap(java.util.HashMap) XQueryCompiler(net.sf.saxon.s9api.XQueryCompiler) BufferedOutputStream(org.apache.nifi.stream.io.BufferedOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) OutputStream(java.io.OutputStream) Document(org.w3c.dom.Document) SAXException(org.xml.sax.SAXException) XdmValue(net.sf.saxon.s9api.XdmValue) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) StringReader(java.io.StringReader) List(java.util.List) ArrayList(java.util.ArrayList) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) BufferedOutputStream(org.apache.nifi.stream.io.BufferedOutputStream) XMLReader(org.xml.sax.XMLReader) TransformerException(javax.xml.transform.TransformerException) TransformerFactoryConfigurationError(javax.xml.transform.TransformerFactoryConfigurationError) FlowFile(org.apache.nifi.flowfile.FlowFile) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStream(java.io.InputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) EntityResolver(org.xml.sax.EntityResolver) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) SaxonApiException(net.sf.saxon.s9api.SaxonApiException) XdmNode(net.sf.saxon.s9api.XdmNode) SAXException(org.xml.sax.SAXException) TransformerException(javax.xml.transform.TransformerException) ProcessException(org.apache.nifi.processor.exception.ProcessException) SaxonApiException(net.sf.saxon.s9api.SaxonApiException) TransformerConfigurationException(javax.xml.transform.TransformerConfigurationException) IOException(java.io.IOException) XQueryExecutable(net.sf.saxon.s9api.XQueryExecutable) ProcessException(org.apache.nifi.processor.exception.ProcessException) SAXSource(javax.xml.transform.sax.SAXSource) Relationship(org.apache.nifi.processor.Relationship) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) DOMDestination(net.sf.saxon.s9api.DOMDestination) Map(java.util.Map) HashMap(java.util.HashMap) XQueryEvaluator(net.sf.saxon.s9api.XQueryEvaluator) XdmItem(net.sf.saxon.s9api.XdmItem)

Example 12 with BufferedInputStream

use of org.apache.nifi.stream.io.BufferedInputStream in project nifi by apache.

the class ScanContent method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLogger();
    final SynchronousFileWatcher fileWatcher = fileWatcherRef.get();
    try {
        if (fileWatcher.checkAndReset()) {
            reloadDictionary(context, true, logger);
        }
    } catch (final IOException e) {
        throw new ProcessException(e);
    }
    Search<byte[]> search = searchRef.get();
    try {
        if (search == null) {
            if (reloadDictionary(context, false, logger)) {
                search = searchRef.get();
            }
        }
    } catch (final IOException e) {
        throw new ProcessException(e);
    }
    if (search == null) {
        return;
    }
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final Search<byte[]> finalSearch = search;
    final AtomicReference<SearchTerm<byte[]>> termRef = new AtomicReference<>(null);
    termRef.set(null);
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream rawIn) throws IOException {
            try (final InputStream in = new BufferedInputStream(rawIn)) {
                final SearchState<byte[]> searchResult = finalSearch.search(in, false);
                if (searchResult.foundMatch()) {
                    termRef.set(searchResult.getResults().keySet().iterator().next());
                }
            }
        }
    });
    final SearchTerm<byte[]> matchingTerm = termRef.get();
    if (matchingTerm == null) {
        logger.info("Routing {} to 'unmatched'", new Object[] { flowFile });
        session.getProvenanceReporter().route(flowFile, REL_NO_MATCH);
        session.transfer(flowFile, REL_NO_MATCH);
    } else {
        final String matchingTermString = matchingTerm.toString(UTF8);
        logger.info("Routing {} to 'matched' because it matched term {}", new Object[] { flowFile, matchingTermString });
        flowFile = session.putAttribute(flowFile, MATCH_ATTRIBUTE_KEY, matchingTermString);
        session.getProvenanceReporter().route(flowFile, REL_MATCH);
        session.transfer(flowFile, REL_MATCH);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) SynchronousFileWatcher(org.apache.nifi.util.file.monitor.SynchronousFileWatcher) DataInputStream(java.io.DataInputStream) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStream(java.io.InputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) SearchTerm(org.apache.nifi.util.search.SearchTerm) ComponentLog(org.apache.nifi.logging.ComponentLog) ProcessException(org.apache.nifi.processor.exception.ProcessException) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) SearchState(org.apache.nifi.util.search.ahocorasick.SearchState) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback)

Example 13 with BufferedInputStream

use of org.apache.nifi.stream.io.BufferedInputStream in project nifi by apache.

the class SplitContent method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final ComponentLog logger = getLogger();
    final boolean keepSequence = context.getProperty(KEEP_SEQUENCE).asBoolean();
    final boolean keepTrailingSequence;
    final boolean keepLeadingSequence;
    if (keepSequence) {
        if (context.getProperty(BYTE_SEQUENCE_LOCATION).getValue().equals(TRAILING_POSITION.getValue())) {
            keepTrailingSequence = true;
            keepLeadingSequence = false;
        } else {
            keepTrailingSequence = false;
            keepLeadingSequence = true;
        }
    } else {
        keepTrailingSequence = false;
        keepLeadingSequence = false;
    }
    final byte[] byteSequence = this.byteSequence.get();
    if (byteSequence == null) {
        // should never happen. But just in case...
        logger.error("{} Unable to obtain Byte Sequence", new Object[] { this });
        session.rollback();
        return;
    }
    final List<Tuple<Long, Long>> splits = new ArrayList<>();
    final NaiveSearchRingBuffer buffer = new NaiveSearchRingBuffer(byteSequence);
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream rawIn) throws IOException {
            long bytesRead = 0L;
            long startOffset = 0L;
            try (final InputStream in = new BufferedInputStream(rawIn)) {
                while (true) {
                    final int nextByte = in.read();
                    if (nextByte == -1) {
                        return;
                    }
                    bytesRead++;
                    boolean matched = buffer.addAndCompare((byte) (nextByte & 0xFF));
                    if (matched) {
                        long splitLength;
                        if (keepTrailingSequence) {
                            splitLength = bytesRead - startOffset;
                        } else {
                            splitLength = bytesRead - startOffset - byteSequence.length;
                        }
                        if (keepLeadingSequence && startOffset > 0) {
                            splitLength += byteSequence.length;
                        }
                        final long splitStart = (keepLeadingSequence && startOffset > 0) ? startOffset - byteSequence.length : startOffset;
                        splits.add(new Tuple<>(splitStart, splitLength));
                        startOffset = bytesRead;
                        buffer.clear();
                    }
                }
            }
        }
    });
    long lastOffsetPlusSize = -1L;
    if (splits.isEmpty()) {
        FlowFile clone = session.clone(flowFile);
        session.transfer(flowFile, REL_ORIGINAL);
        session.transfer(clone, REL_SPLITS);
        logger.info("Found no match for {}; transferring original 'original' and transferring clone {} to 'splits'", new Object[] { flowFile, clone });
        return;
    }
    final ArrayList<FlowFile> splitList = new ArrayList<>();
    for (final Tuple<Long, Long> tuple : splits) {
        long offset = tuple.getKey();
        long size = tuple.getValue();
        if (size > 0) {
            FlowFile split = session.clone(flowFile, offset, size);
            splitList.add(split);
        }
        lastOffsetPlusSize = offset + size;
    }
    // lastOffsetPlusSize indicates the ending position of the last split.
    // if the data didn't end with the byte sequence, we need one final split to run from the end
    // of the last split to the end of the content.
    long finalSplitOffset = lastOffsetPlusSize;
    if (!keepTrailingSequence && !keepLeadingSequence) {
        finalSplitOffset += byteSequence.length;
    }
    if (finalSplitOffset > -1L && finalSplitOffset < flowFile.getSize()) {
        FlowFile finalSplit = session.clone(flowFile, finalSplitOffset, flowFile.getSize() - finalSplitOffset);
        splitList.add(finalSplit);
    }
    final String fragmentId = finishFragmentAttributes(session, flowFile, splitList);
    session.transfer(splitList, REL_SPLITS);
    flowFile = FragmentAttributes.copyAttributesToOriginal(session, flowFile, fragmentId, splitList.size());
    session.transfer(flowFile, REL_ORIGINAL);
    if (splitList.size() > 10) {
        logger.info("Split {} into {} files", new Object[] { flowFile, splitList.size() });
    } else {
        logger.info("Split {} into {} files: {}", new Object[] { flowFile, splitList.size(), splitList });
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) NaiveSearchRingBuffer(org.apache.nifi.util.NaiveSearchRingBuffer) Tuple(org.apache.nifi.util.Tuple)

Example 14 with BufferedInputStream

use of org.apache.nifi.stream.io.BufferedInputStream in project nifi by apache.

the class TarUnpackerSequenceFileWriter method processInputStream.

@Override
protected void processInputStream(final InputStream stream, final FlowFile tarArchivedFlowFile, final Writer writer) throws IOException {
    try (final TarArchiveInputStream tarIn = new TarArchiveInputStream(new BufferedInputStream(stream))) {
        TarArchiveEntry tarEntry;
        while ((tarEntry = tarIn.getNextTarEntry()) != null) {
            if (tarEntry.isDirectory()) {
                continue;
            }
            final String key = tarEntry.getName();
            final long fileSize = tarEntry.getSize();
            final InputStreamWritable inStreamWritable = new InputStreamWritable(tarIn, (int) fileSize);
            writer.append(new Text(key), inStreamWritable);
            logger.debug("Appending FlowFile {} to Sequence File", new Object[] { key });
        }
    }
}
Also used : TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) InputStreamWritable(org.apache.nifi.processors.hadoop.util.InputStreamWritable) BufferedInputStream(org.apache.nifi.stream.io.BufferedInputStream) Text(org.apache.hadoop.io.Text) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry)

Aggregations

BufferedInputStream (org.apache.nifi.stream.io.BufferedInputStream)14 IOException (java.io.IOException)12 InputStream (java.io.InputStream)12 FlowFile (org.apache.nifi.flowfile.FlowFile)11 ComponentLog (org.apache.nifi.logging.ComponentLog)10 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)10 OutputStream (java.io.OutputStream)6 ArrayList (java.util.ArrayList)6 AtomicReference (java.util.concurrent.atomic.AtomicReference)6 ProcessException (org.apache.nifi.processor.exception.ProcessException)6 HashMap (java.util.HashMap)5 Map (java.util.Map)4 OutputStreamCallback (org.apache.nifi.processor.io.OutputStreamCallback)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 Properties (java.util.Properties)3 BufferedOutputStream (org.apache.nifi.stream.io.BufferedOutputStream)3 File (java.io.File)2 StringReader (java.io.StringReader)2 Date (java.util.Date)2 List (java.util.List)2