Search in sources :

Example 1 with NLKBufferedReader

use of org.apache.nifi.processors.standard.util.NLKBufferedReader in project nifi by apache.

the class RouteText method onTrigger.

@Override
@SuppressWarnings({ "unchecked", "rawtypes" })
public void onTrigger(final ProcessContext context, final ProcessSession session) {
    final FlowFile originalFlowFile = session.get();
    if (originalFlowFile == null) {
        return;
    }
    final ComponentLog logger = getLogger();
    final Charset charset = Charset.forName(context.getProperty(CHARACTER_SET).getValue());
    final boolean trim = context.getProperty(TRIM_WHITESPACE).asBoolean();
    final String routeStrategy = context.getProperty(ROUTE_STRATEGY).getValue();
    final String matchStrategy = context.getProperty(MATCH_STRATEGY).getValue();
    final boolean ignoreCase = context.getProperty(IGNORE_CASE).asBoolean();
    final boolean compileRegex = matchStrategy.equals(matchesRegularExpressionValue) || matchStrategy.equals(containsRegularExpressionValue);
    final boolean usePropValue = matchStrategy.equals(satisfiesExpression);
    // Build up a Map of Relationship to object, where the object is the
    // thing that each line is compared against
    final Map<Relationship, Object> propValueMap;
    final Map<Relationship, PropertyValue> propMap = this.propertyMap;
    if (usePropValue) {
        // If we are using an Expression Language we want a Map where the value is the
        // PropertyValue, so we can just use the 'propMap' - no need to copy it.
        propValueMap = (Map) propMap;
    } else {
        propValueMap = new HashMap<>(propMap.size());
        for (final Map.Entry<Relationship, PropertyValue> entry : propMap.entrySet()) {
            final String value = entry.getValue().evaluateAttributeExpressions(originalFlowFile).getValue();
            propValueMap.put(entry.getKey(), compileRegex ? cachedCompiledPattern(value, ignoreCase) : value);
        }
    }
    final Map<Relationship, Map<Group, FlowFile>> flowFileMap = new HashMap<>();
    final Pattern groupPattern = groupingRegex;
    session.read(originalFlowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream in) throws IOException {
            try (final Reader inReader = new InputStreamReader(in, charset);
                final NLKBufferedReader reader = new NLKBufferedReader(inReader)) {
                final Map<String, String> variables = new HashMap<>(2);
                int lineCount = 0;
                String line;
                while ((line = reader.readLine()) != null) {
                    final String matchLine;
                    if (trim) {
                        matchLine = line.trim();
                    } else {
                        // Always trim off the new-line and carriage return characters before evaluating the line.
                        // The NLKBufferedReader maintains these characters so that when we write the line out we can maintain
                        // these characters. However, we don't actually want to match against these characters.
                        final String lineWithoutEndings;
                        final int indexOfCR = line.indexOf("\r");
                        final int indexOfNL = line.indexOf("\n");
                        if (indexOfCR > 0 && indexOfNL > 0) {
                            lineWithoutEndings = line.substring(0, Math.min(indexOfCR, indexOfNL));
                        } else if (indexOfCR > 0) {
                            lineWithoutEndings = line.substring(0, indexOfCR);
                        } else if (indexOfNL > 0) {
                            lineWithoutEndings = line.substring(0, indexOfNL);
                        } else {
                            lineWithoutEndings = line;
                        }
                        matchLine = lineWithoutEndings;
                    }
                    variables.put("line", line);
                    variables.put("lineNo", String.valueOf(++lineCount));
                    int propertiesThatMatchedLine = 0;
                    for (final Map.Entry<Relationship, Object> entry : propValueMap.entrySet()) {
                        boolean lineMatchesProperty = lineMatches(matchLine, entry.getValue(), matchStrategy, ignoreCase, originalFlowFile, variables);
                        if (lineMatchesProperty) {
                            propertiesThatMatchedLine++;
                        }
                        if (lineMatchesProperty && ROUTE_TO_MATCHING_PROPERTY_NAME.getValue().equals(routeStrategy)) {
                            // route each individual line to each Relationship that matches. This one matches.
                            final Relationship relationship = entry.getKey();
                            final Group group = getGroup(matchLine, groupPattern);
                            appendLine(session, flowFileMap, relationship, originalFlowFile, line, charset, group);
                            continue;
                        }
                        // break as soon as possible to avoid calculating things we don't need to calculate.
                        if (lineMatchesProperty && ROUTE_TO_MATCHED_WHEN_ANY_PROPERTY_MATCHES.getValue().equals(routeStrategy)) {
                            break;
                        }
                        if (!lineMatchesProperty && ROUTE_TO_MATCHED_WHEN_ALL_PROPERTIES_MATCH.getValue().equals(routeStrategy)) {
                            break;
                        }
                    }
                    final Relationship relationship;
                    if (ROUTE_TO_MATCHING_PROPERTY_NAME.getValue().equals(routeStrategy) && propertiesThatMatchedLine > 0) {
                        // Set relationship to null so that we do not append the line to each FlowFile again. #appendLine is called
                        // above within the loop, as the line may need to go to multiple different FlowFiles.
                        relationship = null;
                    } else if (ROUTE_TO_MATCHED_WHEN_ANY_PROPERTY_MATCHES.getValue().equals(routeStrategy) && propertiesThatMatchedLine > 0) {
                        relationship = REL_MATCH;
                    } else if (ROUTE_TO_MATCHED_WHEN_ALL_PROPERTIES_MATCH.getValue().equals(routeStrategy) && propertiesThatMatchedLine == propValueMap.size()) {
                        relationship = REL_MATCH;
                    } else {
                        relationship = REL_NO_MATCH;
                    }
                    if (relationship != null) {
                        final Group group = getGroup(matchLine, groupPattern);
                        appendLine(session, flowFileMap, relationship, originalFlowFile, line, charset, group);
                    }
                }
            }
        }
    });
    for (final Map.Entry<Relationship, Map<Group, FlowFile>> entry : flowFileMap.entrySet()) {
        final Relationship relationship = entry.getKey();
        final Map<Group, FlowFile> groupToFlowFileMap = entry.getValue();
        for (final Map.Entry<Group, FlowFile> flowFileEntry : groupToFlowFileMap.entrySet()) {
            final Group group = flowFileEntry.getKey();
            final FlowFile flowFile = flowFileEntry.getValue();
            final Map<String, String> attributes = new HashMap<>(2);
            attributes.put(ROUTE_ATTRIBUTE_KEY, relationship.getName());
            attributes.put(GROUP_ATTRIBUTE_KEY, StringUtils.join(group.getCapturedValues(), ", "));
            logger.info("Created {} from {}; routing to relationship {}", new Object[] { flowFile, originalFlowFile, relationship.getName() });
            FlowFile updatedFlowFile = session.putAllAttributes(flowFile, attributes);
            session.getProvenanceReporter().route(updatedFlowFile, entry.getKey());
            session.transfer(updatedFlowFile, entry.getKey());
        }
    }
    // now transfer the original flow file
    FlowFile flowFile = originalFlowFile;
    logger.info("Routing {} to {}", new Object[] { flowFile, REL_ORIGINAL });
    session.getProvenanceReporter().route(originalFlowFile, REL_ORIGINAL);
    flowFile = session.putAttribute(flowFile, ROUTE_ATTRIBUTE_KEY, REL_ORIGINAL.getName());
    session.transfer(flowFile, REL_ORIGINAL);
}
Also used : HashMap(java.util.HashMap) NLKBufferedReader(org.apache.nifi.processors.standard.util.NLKBufferedReader) Reader(java.io.Reader) InputStreamReader(java.io.InputStreamReader) NLKBufferedReader(org.apache.nifi.processors.standard.util.NLKBufferedReader) FlowFile(org.apache.nifi.flowfile.FlowFile) Pattern(java.util.regex.Pattern) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) Charset(java.nio.charset.Charset) PropertyValue(org.apache.nifi.components.PropertyValue) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog) Relationship(org.apache.nifi.processor.Relationship) DynamicRelationship(org.apache.nifi.annotation.behavior.DynamicRelationship) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) Map(java.util.Map) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap)

Aggregations

IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 InputStreamReader (java.io.InputStreamReader)1 Reader (java.io.Reader)1 Charset (java.nio.charset.Charset)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 ConcurrentMap (java.util.concurrent.ConcurrentMap)1 Pattern (java.util.regex.Pattern)1 DynamicRelationship (org.apache.nifi.annotation.behavior.DynamicRelationship)1 PropertyValue (org.apache.nifi.components.PropertyValue)1 FlowFile (org.apache.nifi.flowfile.FlowFile)1 ComponentLog (org.apache.nifi.logging.ComponentLog)1 Relationship (org.apache.nifi.processor.Relationship)1 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)1 NLKBufferedReader (org.apache.nifi.processors.standard.util.NLKBufferedReader)1