Search in sources :

Example 16 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class PutSQL method getSQL.

/**
 * Determines the SQL statement that should be executed for the given FlowFile
 *
 * @param session the session that can be used to access the given FlowFile
 * @param flowFile the FlowFile whose SQL statement should be executed
 *
 * @return the SQL that is associated with the given FlowFile
 */
private String getSQL(final ProcessSession session, final FlowFile flowFile) {
    // Read the SQL from the FlowFile's content
    final byte[] buffer = new byte[(int) flowFile.getSize()];
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream in) throws IOException {
            StreamUtils.fillBuffer(in, buffer);
        }
    });
    // Create the PreparedStatement to use for this FlowFile.
    final String sql = new String(buffer, StandardCharsets.UTF_8);
    return sql;
}
Also used : InputStream(java.io.InputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) IOException(java.io.IOException)

Example 17 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class SplitRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile original = session.get();
    if (original == null) {
        return;
    }
    final RecordReaderFactory readerFactory = context.getProperty(RECORD_READER).asControllerService(RecordReaderFactory.class);
    final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).asControllerService(RecordSetWriterFactory.class);
    final int maxRecords = context.getProperty(RECORDS_PER_SPLIT).evaluateAttributeExpressions(original).asInteger();
    final List<FlowFile> splits = new ArrayList<>();
    final Map<String, String> originalAttributes = original.getAttributes();
    try {
        session.read(original, new InputStreamCallback() {

            @Override
            public void process(final InputStream in) throws IOException {
                try (final RecordReader reader = readerFactory.createRecordReader(originalAttributes, in, getLogger())) {
                    final RecordSchema schema = writerFactory.getSchema(originalAttributes, reader.getSchema());
                    final RecordSet recordSet = reader.createRecordSet();
                    final PushBackRecordSet pushbackSet = new PushBackRecordSet(recordSet);
                    while (pushbackSet.isAnotherRecord()) {
                        FlowFile split = session.create(original);
                        try {
                            final Map<String, String> attributes = new HashMap<>();
                            final WriteResult writeResult;
                            try (final OutputStream out = session.write(split);
                                final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out)) {
                                if (maxRecords == 1) {
                                    final Record record = pushbackSet.next();
                                    writeResult = writer.write(record);
                                } else {
                                    final RecordSet limitedSet = pushbackSet.limit(maxRecords);
                                    writeResult = writer.write(limitedSet);
                                }
                                attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
                                attributes.put(CoreAttributes.MIME_TYPE.key(), writer.getMimeType());
                                attributes.putAll(writeResult.getAttributes());
                                session.adjustCounter("Records Split", writeResult.getRecordCount(), false);
                            }
                            split = session.putAllAttributes(split, attributes);
                        } finally {
                            splits.add(split);
                        }
                    }
                } catch (final SchemaNotFoundException | MalformedRecordException e) {
                    throw new ProcessException("Failed to parse incoming data", e);
                }
            }
        });
    } catch (final ProcessException pe) {
        getLogger().error("Failed to split {}", new Object[] { original, pe });
        session.remove(splits);
        session.transfer(original, REL_FAILURE);
        return;
    }
    session.transfer(original, REL_ORIGINAL);
    session.transfer(splits, REL_SPLITS);
    getLogger().info("Successfully split {} into {} FlowFiles, each containing up to {} records", new Object[] { original, splits.size(), maxRecords });
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) RecordSetWriter(org.apache.nifi.serialization.RecordSetWriter) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) ProcessException(org.apache.nifi.processor.exception.ProcessException) WriteResult(org.apache.nifi.serialization.WriteResult) RecordSetWriterFactory(org.apache.nifi.serialization.RecordSetWriterFactory) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) Record(org.apache.nifi.serialization.record.Record) PushBackRecordSet(org.apache.nifi.serialization.record.PushBackRecordSet) RecordSet(org.apache.nifi.serialization.record.RecordSet) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) HashMap(java.util.HashMap) Map(java.util.Map) PushBackRecordSet(org.apache.nifi.serialization.record.PushBackRecordSet)

Example 18 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class SplitText method onTrigger.

/**
 * Will split the incoming stream releasing all splits as FlowFile at once.
 */
@Override
public void onTrigger(ProcessContext context, ProcessSession processSession) throws ProcessException {
    FlowFile sourceFlowFile = processSession.get();
    if (sourceFlowFile == null) {
        return;
    }
    AtomicBoolean error = new AtomicBoolean();
    List<SplitInfo> computedSplitsInfo = new ArrayList<>();
    AtomicReference<SplitInfo> headerSplitInfoRef = new AtomicReference<>();
    processSession.read(sourceFlowFile, new InputStreamCallback() {

        @Override
        public void process(InputStream in) throws IOException {
            TextLineDemarcator demarcator = new TextLineDemarcator(in);
            SplitInfo splitInfo = null;
            long startOffset = 0;
            // Compute fragment representing the header (if available)
            long start = System.nanoTime();
            try {
                if (SplitText.this.headerLineCount > 0) {
                    splitInfo = SplitText.this.computeHeader(demarcator, startOffset, SplitText.this.headerLineCount, null, null);
                    if ((splitInfo != null) && (splitInfo.lineCount < SplitText.this.headerLineCount)) {
                        error.set(true);
                        getLogger().error("Unable to split " + sourceFlowFile + " due to insufficient amount of header lines. Required " + SplitText.this.headerLineCount + " but was " + splitInfo.lineCount + ". Routing to failure.");
                    }
                } else if (SplitText.this.headerMarker != null) {
                    splitInfo = SplitText.this.computeHeader(demarcator, startOffset, Long.MAX_VALUE, SplitText.this.headerMarker.getBytes(StandardCharsets.UTF_8), null);
                }
                headerSplitInfoRef.set(splitInfo);
            } catch (IllegalStateException e) {
                error.set(true);
                getLogger().error(e.getMessage() + " Routing to failure.", e);
            }
            // Compute and collect fragments representing the individual splits
            if (!error.get()) {
                if (headerSplitInfoRef.get() != null) {
                    startOffset = headerSplitInfoRef.get().length;
                }
                long preAccumulatedLength = startOffset;
                while ((splitInfo = SplitText.this.nextSplit(demarcator, startOffset, SplitText.this.lineCount, splitInfo, preAccumulatedLength)) != null) {
                    computedSplitsInfo.add(splitInfo);
                    startOffset += splitInfo.length;
                }
                long stop = System.nanoTime();
                if (getLogger().isDebugEnabled()) {
                    getLogger().debug("Computed splits in " + (stop - start) + " milliseconds.");
                }
            }
        }
    });
    if (error.get()) {
        processSession.transfer(sourceFlowFile, REL_FAILURE);
    } else {
        final String fragmentId = UUID.randomUUID().toString();
        List<FlowFile> splitFlowFiles = this.generateSplitFlowFiles(fragmentId, sourceFlowFile, headerSplitInfoRef.get(), computedSplitsInfo, processSession);
        final FlowFile originalFlowFile = FragmentAttributes.copyAttributesToOriginal(processSession, sourceFlowFile, fragmentId, splitFlowFiles.size());
        processSession.transfer(originalFlowFile, REL_ORIGINAL);
        if (!splitFlowFiles.isEmpty()) {
            processSession.transfer(splitFlowFiles, REL_SPLITS);
        }
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) TextLineDemarcator(org.apache.nifi.stream.io.util.TextLineDemarcator) ArrayList(java.util.ArrayList) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback)

Example 19 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class IdentifyMimeType method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final ComponentLog logger = getLogger();
    final AtomicReference<String> mimeTypeRef = new AtomicReference<>(null);
    final String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key());
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream stream) throws IOException {
            try (final InputStream in = new BufferedInputStream(stream)) {
                TikaInputStream tikaStream = TikaInputStream.get(in);
                Metadata metadata = new Metadata();
                if (filename != null && context.getProperty(USE_FILENAME_IN_DETECTION).asBoolean()) {
                    metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, filename);
                }
                // Get mime type
                MediaType mediatype = detector.detect(tikaStream, metadata);
                mimeTypeRef.set(mediatype.toString());
            }
        }
    });
    String mimeType = mimeTypeRef.get();
    String extension = "";
    try {
        MimeType mimetype;
        mimetype = config.getMimeRepository().forName(mimeType);
        extension = mimetype.getExtension();
    } catch (MimeTypeException ex) {
        logger.warn("MIME type extension lookup failed: {}", new Object[] { ex });
    }
    // Workaround for bug in Tika - https://issues.apache.org/jira/browse/TIKA-1563
    if (mimeType != null && mimeType.equals("application/gzip") && extension.equals(".tgz")) {
        extension = ".gz";
    }
    if (mimeType == null) {
        flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/octet-stream");
        flowFile = session.putAttribute(flowFile, "mime.extension", "");
        logger.info("Unable to identify MIME Type for {}; setting to application/octet-stream", new Object[] { flowFile });
    } else {
        flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), mimeType);
        flowFile = session.putAttribute(flowFile, "mime.extension", extension);
        logger.info("Identified {} as having MIME Type {}", new Object[] { flowFile, mimeType });
    }
    session.getProvenanceReporter().modifyAttributes(flowFile);
    session.transfer(flowFile, REL_SUCCESS);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) BufferedInputStream(java.io.BufferedInputStream) TikaInputStream(org.apache.tika.io.TikaInputStream) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) TikaInputStream(org.apache.tika.io.TikaInputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) MimeType(org.apache.tika.mime.MimeType) BufferedInputStream(java.io.BufferedInputStream) MimeTypeException(org.apache.tika.mime.MimeTypeException) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) MediaType(org.apache.tika.mime.MediaType)

Example 20 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class ParseCEF method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final CEFParser parser = new CEFParser(validator);
    final byte[] buffer = new byte[(int) flowFile.getSize()];
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream in) throws IOException {
            StreamUtils.fillBuffer(in, buffer);
        }
    });
    CommonEvent event;
    try {
        // parcefoneLocale defaults to en_US, so this should not fail. But we force failure in case the custom
        // validator failed to identify an invalid Locale
        final Locale parcefoneLocale = Locale.forLanguageTag(context.getProperty(DATETIME_REPRESENTATION).getValue());
        event = parser.parse(buffer, true, parcefoneLocale);
    } catch (Exception e) {
        // This should never trigger but adding in here as a fencing mechanism to
        // address possible ParCEFone bugs.
        getLogger().error("Parser returned unexpected Exception {} while processing {}; routing to failure", new Object[] { e, flowFile });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    // event, so we test
    if (event == null) {
        getLogger().error("Failed to parse {} as a CEF message: it does not conform to the CEF standard; routing to failure", new Object[] { flowFile });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    try {
        final String destination = context.getProperty(FIELDS_DESTINATION).getValue();
        switch(destination) {
            case DESTINATION_ATTRIBUTES:
                final Map<String, String> attributes = new HashMap<>();
                // Process KVs of the Header field
                for (Map.Entry<String, Object> entry : event.getHeader().entrySet()) {
                    attributes.put("cef.header." + entry.getKey(), prettyResult(entry.getValue(), tzId));
                }
                // Process KVs composing the Extension field
                for (Map.Entry<String, Object> entry : event.getExtension(true).entrySet()) {
                    attributes.put("cef.extension." + entry.getKey(), prettyResult(entry.getValue(), tzId));
                    flowFile = session.putAllAttributes(flowFile, attributes);
                }
                break;
            case DESTINATION_CONTENT:
                ObjectNode results = mapper.createObjectNode();
                // Add two JSON objects containing one CEF field each
                results.set("header", mapper.valueToTree(event.getHeader()));
                results.set("extension", mapper.valueToTree(event.getExtension(true)));
                // to the resulting JSON
                if (context.getProperty(APPEND_RAW_MESSAGE_TO_JSON).asBoolean()) {
                    results.set("_raw", mapper.valueToTree(new String(buffer)));
                }
                flowFile = session.write(flowFile, new OutputStreamCallback() {

                    @Override
                    public void process(OutputStream out) throws IOException {
                        try (OutputStream outputStream = new BufferedOutputStream(out)) {
                            outputStream.write(mapper.writeValueAsBytes(results));
                        }
                    }
                });
                // Adjust the FlowFile mime.type attribute
                flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/json");
                // Update the provenance for good measure
                session.getProvenanceReporter().modifyContent(flowFile, "Replaced content with parsed CEF fields and values");
                break;
        }
        // whatever the parsing stratgy, ready to transfer to success and commit
        session.transfer(flowFile, REL_SUCCESS);
        session.commit();
    } catch (CEFHandlingException e) {
        // The flowfile has failed parsing & validation, routing to failure and committing
        getLogger().error("Failed to parse {} as a CEF message due to {}; routing to failure", new Object[] { flowFile, e });
        // Create a provenance event recording the routing to failure
        session.getProvenanceReporter().route(flowFile, REL_FAILURE);
        session.transfer(flowFile, REL_FAILURE);
        session.commit();
        return;
    } finally {
        session.rollback();
    }
}
Also used : Locale(java.util.Locale) FlowFile(org.apache.nifi.flowfile.FlowFile) ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) HashMap(java.util.HashMap) InputStream(java.io.InputStream) BufferedOutputStream(org.apache.nifi.stream.io.BufferedOutputStream) OutputStream(java.io.OutputStream) IOException(java.io.IOException) CommonEvent(com.fluenda.parcefone.event.CommonEvent) CEFHandlingException(com.fluenda.parcefone.event.CEFHandlingException) ProcessException(org.apache.nifi.processor.exception.ProcessException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) CEFParser(com.fluenda.parcefone.parser.CEFParser) Map(java.util.Map) HashMap(java.util.HashMap) BufferedOutputStream(org.apache.nifi.stream.io.BufferedOutputStream) CEFHandlingException(com.fluenda.parcefone.event.CEFHandlingException)

Aggregations

IOException (java.io.IOException)80 InputStream (java.io.InputStream)80 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)80 FlowFile (org.apache.nifi.flowfile.FlowFile)62 ProcessException (org.apache.nifi.processor.exception.ProcessException)35 ComponentLog (org.apache.nifi.logging.ComponentLog)27 HashMap (java.util.HashMap)25 AtomicReference (java.util.concurrent.atomic.AtomicReference)23 OutputStream (java.io.OutputStream)19 BufferedInputStream (java.io.BufferedInputStream)18 ArrayList (java.util.ArrayList)17 Map (java.util.Map)17 OutputStreamCallback (org.apache.nifi.processor.io.OutputStreamCallback)13 ByteArrayOutputStream (java.io.ByteArrayOutputStream)11 BufferedInputStream (org.apache.nifi.stream.io.BufferedInputStream)10 StopWatch (org.apache.nifi.util.StopWatch)10 HashSet (java.util.HashSet)9 Charset (java.nio.charset.Charset)8 FileInputStream (java.io.FileInputStream)7 ProcessSession (org.apache.nifi.processor.ProcessSession)7