Search in sources :

Example 56 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class YandexTranslate method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final StopWatch stopWatch = new StopWatch(true);
    final String key = context.getProperty(KEY).getValue();
    final String sourceLanguage = context.getProperty(SOURCE_LANGUAGE).evaluateAttributeExpressions(flowFile).getValue();
    final String targetLanguage = context.getProperty(TARGET_LANGUAGE).evaluateAttributeExpressions(flowFile).getValue();
    final String encoding = context.getProperty(CHARACTER_SET).evaluateAttributeExpressions(flowFile).getValue();
    final List<String> attributeNames = new ArrayList<>();
    final List<String> textValues = new ArrayList<>();
    for (final PropertyDescriptor descriptor : context.getProperties().keySet()) {
        if (descriptor.isDynamic()) {
            // add to list so that we know the order when the translations come back.
            attributeNames.add(descriptor.getName());
            textValues.add(context.getProperty(descriptor).evaluateAttributeExpressions(flowFile).getValue());
        }
    }
    if (context.getProperty(TRANSLATE_CONTENT).asBoolean()) {
        final byte[] buff = new byte[(int) flowFile.getSize()];
        session.read(flowFile, new InputStreamCallback() {

            @Override
            public void process(final InputStream in) throws IOException {
                StreamUtils.fillBuffer(in, buff);
            }
        });
        final String content = new String(buff, Charset.forName(encoding));
        textValues.add(content);
    }
    final Invocation invocation = prepareResource(key, textValues, sourceLanguage, targetLanguage);
    final Response response;
    try {
        response = invocation.invoke();
    } catch (final Exception e) {
        getLogger().error("Failed to make request to Yandex to transate text for {} due to {}; routing to comms.failure", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_COMMS_FAILURE);
        return;
    }
    if (response.getStatus() != Response.Status.OK.getStatusCode()) {
        getLogger().error("Failed to translate text using Yandex for {}; response was {}: {}; routing to {}", new Object[] { flowFile, response.getStatus(), response.getStatusInfo().getReasonPhrase(), REL_TRANSLATION_FAILED.getName() });
        flowFile = session.putAttribute(flowFile, "yandex.translate.failure.reason", response.getStatusInfo().getReasonPhrase());
        session.transfer(flowFile, REL_TRANSLATION_FAILED);
        return;
    }
    final Map<String, String> newAttributes = new HashMap<>();
    final Translation translation = response.readEntity(Translation.class);
    final List<String> texts = translation.getText();
    for (int i = 0; i < texts.size(); i++) {
        final String text = texts.get(i);
        if (i < attributeNames.size()) {
            final String attributeName = attributeNames.get(i);
            newAttributes.put(attributeName, text);
        } else {
            flowFile = session.write(flowFile, new OutputStreamCallback() {

                @Override
                public void process(final OutputStream out) throws IOException {
                    out.write(text.getBytes(encoding));
                }
            });
            newAttributes.put("language", targetLanguage);
        }
    }
    if (!newAttributes.isEmpty()) {
        flowFile = session.putAllAttributes(flowFile, newAttributes);
    }
    stopWatch.stop();
    session.transfer(flowFile, REL_SUCCESS);
    getLogger().info("Successfully translated {} items for {} from {} to {} in {}; routing to success", new Object[] { texts.size(), flowFile, sourceLanguage, targetLanguage, stopWatch.getDuration() });
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) Translation(org.apache.nifi.processors.yandex.model.Translation) PropertyDescriptor(org.apache.nifi.components.PropertyDescriptor) Invocation(javax.ws.rs.client.Invocation) HashMap(java.util.HashMap) MultivaluedHashMap(javax.ws.rs.core.MultivaluedHashMap) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) StopWatch(org.apache.nifi.util.StopWatch) Response(javax.ws.rs.core.Response) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback)

Example 57 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class ExtractHL7Attributes method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final Charset charset = Charset.forName(context.getProperty(CHARACTER_SET).evaluateAttributeExpressions(flowFile).getValue());
    final Boolean useSegmentNames = context.getProperty(USE_SEGMENT_NAMES).asBoolean();
    final Boolean parseSegmentFields = context.getProperty(PARSE_SEGMENT_FIELDS).asBoolean();
    final Boolean skipValidation = context.getProperty(SKIP_VALIDATION).asBoolean();
    final String inputVersion = context.getProperty(HL7_INPUT_VERSION).getValue();
    final byte[] buffer = new byte[(int) flowFile.getSize()];
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream in) throws IOException {
            StreamUtils.fillBuffer(in, buffer);
        }
    });
    @SuppressWarnings("resource") final HapiContext hapiContext = new DefaultHapiContext();
    if (!inputVersion.equals("autodetect")) {
        hapiContext.setModelClassFactory(new CanonicalModelClassFactory(inputVersion));
    }
    if (skipValidation) {
        hapiContext.setValidationContext((ValidationContext) ValidationContextFactory.noValidation());
    }
    final PipeParser parser = hapiContext.getPipeParser();
    final String hl7Text = new String(buffer, charset);
    try {
        final Message message = parser.parse(hl7Text);
        final Map<String, String> attributes = getAttributes(message, useSegmentNames, parseSegmentFields);
        flowFile = session.putAllAttributes(flowFile, attributes);
        getLogger().debug("Added the following attributes for {}: {}", new Object[] { flowFile, attributes });
    } catch (final HL7Exception e) {
        getLogger().error("Failed to extract attributes from {} due to {}", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    session.transfer(flowFile, REL_SUCCESS);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) PipeParser(ca.uhn.hl7v2.parser.PipeParser) Message(ca.uhn.hl7v2.model.Message) InputStream(java.io.InputStream) Charset(java.nio.charset.Charset) IOException(java.io.IOException) CanonicalModelClassFactory(ca.uhn.hl7v2.parser.CanonicalModelClassFactory) DefaultHapiContext(ca.uhn.hl7v2.DefaultHapiContext) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) HL7Exception(ca.uhn.hl7v2.HL7Exception) DefaultHapiContext(ca.uhn.hl7v2.DefaultHapiContext) HapiContext(ca.uhn.hl7v2.HapiContext)

Example 58 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class AbstractHTMLProcessor method parseHTMLDocumentFromFlowfile.

/**
 * Parses the Jsoup HTML document from the FlowFile input content.
 *
 * @param inputFlowFile Input FlowFile containing the HTML
 * @param context ProcessContext
 * @param session ProcessSession
 *
 * @return Jsoup Document
 */
protected Document parseHTMLDocumentFromFlowfile(final FlowFile inputFlowFile, final ProcessContext context, final ProcessSession session) {
    final AtomicReference<Document> doc = new AtomicReference<>();
    session.read(inputFlowFile, new InputStreamCallback() {

        @Override
        public void process(InputStream inputStream) throws IOException {
            final String baseUrl = getBaseUrl(inputFlowFile, context);
            if (baseUrl == null || baseUrl.isEmpty()) {
                throw new RuntimeException("Base URL was empty.");
            }
            doc.set(Jsoup.parse(inputStream, context.getProperty(HTML_CHARSET).getValue(), baseUrl));
        }
    });
    return doc.get();
}
Also used : InputStream(java.io.InputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) Document(org.jsoup.nodes.Document)

Example 59 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class InferAvroSchema method inferAvroSchemaFromCSV.

/**
 * Infers the Avro schema from the input Flowfile content. To infer an Avro schema for CSV content a header line is
 * required. You can configure the processor to pull that header line from the first line of the CSV data if it is
 * present OR you can manually supply the desired header line as a property value.
 *
 * @param inputFlowFile
 *  The original input FlowFile containing the CSV content as it entered this processor.
 *
 * @param context
 *  ProcessContext to pull processor configurations.
 *
 * @param session
 *  ProcessSession to transfer FlowFiles
 */
private String inferAvroSchemaFromCSV(final FlowFile inputFlowFile, final ProcessContext context, final ProcessSession session) {
    // Determines the header line either from the property input or the first line of the delimited file.
    final AtomicReference<String> header = new AtomicReference<>();
    final AtomicReference<Boolean> hasHeader = new AtomicReference<>();
    if (context.getProperty(GET_CSV_HEADER_DEFINITION_FROM_INPUT).asBoolean() == Boolean.TRUE) {
        // Read the first line of the file to get the header value.
        session.read(inputFlowFile, new InputStreamCallback() {

            @Override
            public void process(InputStream in) throws IOException {
                BufferedReader br = new BufferedReader(new InputStreamReader(in));
                header.set(br.readLine());
                hasHeader.set(Boolean.TRUE);
                br.close();
            }
        });
        hasHeader.set(Boolean.TRUE);
    } else {
        header.set(context.getProperty(CSV_HEADER_DEFINITION).evaluateAttributeExpressions(inputFlowFile).getValue());
        hasHeader.set(Boolean.FALSE);
    }
    // Prepares the CSVProperties for kite
    CSVProperties props = new CSVProperties.Builder().charset(context.getProperty(CHARSET).evaluateAttributeExpressions(inputFlowFile).getValue()).delimiter(context.getProperty(DELIMITER).evaluateAttributeExpressions(inputFlowFile).getValue()).quote(context.getProperty(QUOTE_STRING).evaluateAttributeExpressions(inputFlowFile).getValue()).escape(context.getProperty(ESCAPE_STRING).evaluateAttributeExpressions(inputFlowFile).getValue()).linesToSkip(context.getProperty(HEADER_LINE_SKIP_COUNT).evaluateAttributeExpressions(inputFlowFile).asInteger()).header(header.get()).hasHeader(hasHeader.get()).build();
    final AtomicReference<String> avroSchema = new AtomicReference<>();
    session.read(inputFlowFile, new InputStreamCallback() {

        @Override
        public void process(InputStream in) throws IOException {
            avroSchema.set(CSVUtil.inferSchema(context.getProperty(RECORD_NAME).evaluateAttributeExpressions(inputFlowFile).getValue(), in, props).toString(context.getProperty(PRETTY_AVRO_OUTPUT).asBoolean()));
        }
    });
    return avroSchema.get();
}
Also used : InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) CSVProperties(org.kitesdk.data.spi.filesystem.CSVProperties) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) BufferedReader(java.io.BufferedReader)

Example 60 with InputStreamCallback

use of org.apache.nifi.processor.io.InputStreamCallback in project nifi by apache.

the class InferAvroSchema method inferAvroSchemaFromJSON.

/**
 * Infers the Avro schema from the input Flowfile content.
 *
 * @param inputFlowFile
 *  The original input FlowFile containing the JSON content as it entered this processor.
 *
 * @param context
 *  ProcessContext to pull processor configurations.
 *
 * @param session
 *  ProcessSession to transfer FlowFiles
 */
private String inferAvroSchemaFromJSON(final FlowFile inputFlowFile, final ProcessContext context, final ProcessSession session) {
    final AtomicReference<String> avroSchema = new AtomicReference<>();
    session.read(inputFlowFile, new InputStreamCallback() {

        @Override
        public void process(InputStream in) throws IOException {
            Schema as = JsonUtil.inferSchema(in, context.getProperty(RECORD_NAME).evaluateAttributeExpressions(inputFlowFile).getValue(), context.getProperty(NUM_RECORDS_TO_ANALYZE).evaluateAttributeExpressions(inputFlowFile).asInteger());
            avroSchema.set(as.toString(context.getProperty(PRETTY_AVRO_OUTPUT).asBoolean()));
        }
    });
    return avroSchema.get();
}
Also used : InputStream(java.io.InputStream) Schema(org.apache.avro.Schema) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException)

Aggregations

IOException (java.io.IOException)80 InputStream (java.io.InputStream)80 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)80 FlowFile (org.apache.nifi.flowfile.FlowFile)62 ProcessException (org.apache.nifi.processor.exception.ProcessException)35 ComponentLog (org.apache.nifi.logging.ComponentLog)27 HashMap (java.util.HashMap)25 AtomicReference (java.util.concurrent.atomic.AtomicReference)23 OutputStream (java.io.OutputStream)19 BufferedInputStream (java.io.BufferedInputStream)18 ArrayList (java.util.ArrayList)17 Map (java.util.Map)17 OutputStreamCallback (org.apache.nifi.processor.io.OutputStreamCallback)13 ByteArrayOutputStream (java.io.ByteArrayOutputStream)11 BufferedInputStream (org.apache.nifi.stream.io.BufferedInputStream)10 StopWatch (org.apache.nifi.util.StopWatch)10 HashSet (java.util.HashSet)9 Charset (java.nio.charset.Charset)8 FileInputStream (java.io.FileInputStream)7 ProcessSession (org.apache.nifi.processor.ProcessSession)7