Search in sources :

Example 1 with Match

use of io.thekraken.grok.api.Match in project nifi by apache.

the class ExtractGrok method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final StopWatch stopWatch = new StopWatch(true);
    final Charset charset = Charset.forName(context.getProperty(CHARACTER_SET).getValue());
    final String contentString;
    byte[] buffer = bufferQueue.poll();
    if (buffer == null) {
        final int maxBufferSize = context.getProperty(MAX_BUFFER_SIZE).asDataSize(DataUnit.B).intValue();
        buffer = new byte[maxBufferSize];
    }
    try {
        final byte[] byteBuffer = buffer;
        session.read(flowFile, new InputStreamCallback() {

            @Override
            public void process(InputStream in) throws IOException {
                StreamUtils.fillBuffer(in, byteBuffer, false);
            }
        });
        final long len = Math.min(byteBuffer.length, flowFile.getSize());
        contentString = new String(byteBuffer, 0, (int) len, charset);
    } finally {
        bufferQueue.offer(buffer);
    }
    final Match gm = grok.match(contentString);
    gm.captures();
    if (gm.toMap().isEmpty()) {
        session.transfer(flowFile, REL_NO_MATCH);
        getLogger().info("Did not match any Grok Expressions for FlowFile {}", new Object[] { flowFile });
        return;
    }
    final ObjectMapper objectMapper = new ObjectMapper();
    switch(context.getProperty(DESTINATION).getValue()) {
        case FLOWFILE_ATTRIBUTE:
            Map<String, String> grokResults = new HashMap<>();
            for (Map.Entry<String, Object> entry : gm.toMap().entrySet()) {
                if (null != entry.getValue()) {
                    grokResults.put("grok." + entry.getKey(), entry.getValue().toString());
                }
            }
            flowFile = session.putAllAttributes(flowFile, grokResults);
            session.getProvenanceReporter().modifyAttributes(flowFile);
            session.transfer(flowFile, REL_MATCH);
            getLogger().info("Matched {} Grok Expressions and added attributes to FlowFile {}", new Object[] { grokResults.size(), flowFile });
            break;
        case FLOWFILE_CONTENT:
            FlowFile conFlowfile = session.write(flowFile, new StreamCallback() {

                @Override
                public void process(InputStream in, OutputStream out) throws IOException {
                    out.write(objectMapper.writeValueAsBytes(gm.toMap()));
                }
            });
            conFlowfile = session.putAttribute(conFlowfile, CoreAttributes.MIME_TYPE.key(), APPLICATION_JSON);
            session.getProvenanceReporter().modifyContent(conFlowfile, "Replaced content with parsed Grok fields and values", stopWatch.getElapsed(TimeUnit.MILLISECONDS));
            session.transfer(conFlowfile, REL_MATCH);
            break;
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) HashMap(java.util.HashMap) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) Charset(java.nio.charset.Charset) IOException(java.io.IOException) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) StreamCallback(org.apache.nifi.processor.io.StreamCallback) StopWatch(org.apache.nifi.util.StopWatch) Match(io.thekraken.grok.api.Match) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) HashMap(java.util.HashMap) Map(java.util.Map) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 2 with Match

use of io.thekraken.grok.api.Match in project nifi by apache.

the class GrokRecordReader method nextRecord.

@Override
public Record nextRecord(final boolean coerceTypes, final boolean dropUnknownFields) throws IOException, MalformedRecordException {
    Map<String, Object> valueMap = nextMap;
    nextMap = null;
    StringBuilder raw = new StringBuilder();
    int iterations = 0;
    while (valueMap == null || valueMap.isEmpty()) {
        iterations++;
        final String line = nextLine == null ? reader.readLine() : nextLine;
        raw.append(line);
        // ensure that we don't process nextLine again
        nextLine = null;
        if (line == null) {
            return null;
        }
        final Match match = grok.match(line);
        match.captures();
        valueMap = match.toMap();
    }
    if (iterations == 0 && nextLine != null) {
        raw.append(nextLine);
    }
    // Read the next line to see if it matches the pattern (in which case we will simply leave it for
    // the next call to nextRecord()) or we will attach it to the previously read record.
    String stackTrace = null;
    final StringBuilder trailingText = new StringBuilder();
    while ((nextLine = reader.readLine()) != null) {
        final Match nextLineMatch = grok.match(nextLine);
        nextLineMatch.captures();
        final Map<String, Object> nextValueMap = nextLineMatch.toMap();
        if (nextValueMap.isEmpty()) {
            // the stack trace ends. Otherwise, append the next line to the last field in the record.
            if (isStartOfStackTrace(nextLine)) {
                stackTrace = readStackTrace(nextLine);
                raw.append("\n").append(stackTrace);
                break;
            } else if (append) {
                trailingText.append("\n").append(nextLine);
                raw.append("\n").append(nextLine);
            }
        } else {
            // The next line matched our pattern.
            nextMap = nextValueMap;
            break;
        }
    }
    final Record record = createRecord(valueMap, trailingText, stackTrace, raw.toString(), coerceTypes, dropUnknownFields);
    return record;
}
Also used : Record(org.apache.nifi.serialization.record.Record) MapRecord(org.apache.nifi.serialization.record.MapRecord) Match(io.thekraken.grok.api.Match)

Example 3 with Match

use of io.thekraken.grok.api.Match in project incubator-gobblin by apache.

the class GrokToJsonConverter method createOutput.

@VisibleForTesting
JsonObject createOutput(JsonArray outputSchema, String inputRecord) throws DataConversionException {
    JsonObject outputRecord = new JsonObject();
    Match gm = grok.match(inputRecord);
    gm.captures();
    JsonElement capturesJson = JSON_PARSER.parse(gm.toJson());
    for (JsonElement anOutputSchema : outputSchema) {
        JsonObject outputSchemaJsonObject = anOutputSchema.getAsJsonObject();
        String key = outputSchemaJsonObject.get(COLUMN_NAME_KEY).getAsString();
        String type = outputSchemaJsonObject.getAsJsonObject(DATA_TYPE).get(TYPE_KEY).getAsString();
        if (isFieldNull(capturesJson, key)) {
            if (!outputSchemaJsonObject.get(NULLABLE).getAsBoolean()) {
                throw new DataConversionException("Field " + key + " is null or not exists but it is non-nullable by the schema.");
            }
            outputRecord.add(key, JsonNull.INSTANCE);
        } else {
            JsonElement jsonElement = capturesJson.getAsJsonObject().get(key);
            switch(type) {
                case "int":
                    outputRecord.addProperty(key, jsonElement.getAsInt());
                    break;
                case "long":
                    outputRecord.addProperty(key, jsonElement.getAsLong());
                    break;
                case "double":
                    outputRecord.addProperty(key, jsonElement.getAsDouble());
                    break;
                case "float":
                    outputRecord.addProperty(key, jsonElement.getAsFloat());
                    break;
                case "boolean":
                    outputRecord.addProperty(key, jsonElement.getAsBoolean());
                    break;
                case "string":
                default:
                    outputRecord.addProperty(key, jsonElement.getAsString());
            }
        }
    }
    return outputRecord;
}
Also used : JsonElement(com.google.gson.JsonElement) JsonObject(com.google.gson.JsonObject) DataConversionException(org.apache.gobblin.converter.DataConversionException) Match(io.thekraken.grok.api.Match) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 4 with Match

use of io.thekraken.grok.api.Match in project LogHub by fbacchella.

the class TestGrok method TestLoadPatterns8.

// Will fails when issue https://github.com/thekrakken/java-grok/issues/64 is corrected
@Test
public void TestLoadPatterns8() throws GrokException {
    String pattern = "(?<message>client id): (?<clientid>.*)";
    String input = "client id: \"name\" \"Mac OS X Mail\" \"version\" \"10.2 (3259)\" \"os\" \"Mac OS X\" \"os-version\" \"10.12.3 (16D32)\" \"vendor\" \"Apple Inc.\"";
    // Validate the search is good
    Pattern p = Pattern.compile("(?<message>client id): (?<clientid>.*)");
    Matcher m = p.matcher(input);
    if (m.matches()) {
        Assert.assertEquals("\"name\" \"Mac OS X Mail\" \"version\" \"10.2 (3259)\" \"os\" \"Mac OS X\" \"os-version\" \"10.12.3 (16D32)\" \"vendor\" \"Apple Inc.\"", m.group("clientid"));
    }
    io.thekraken.grok.api.Grok grok = new io.thekraken.grok.api.Grok();
    grok.compile(pattern, false);
    Match gm = grok.match(input);
    gm.captures();
    Assert.assertNotEquals(gm.toMap().get("clientid"), gm.getMatch().group("clientid"));
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) Match(io.thekraken.grok.api.Match) Test(org.junit.Test)

Example 5 with Match

use of io.thekraken.grok.api.Match in project LogHub by fbacchella.

the class Grok method processMessage.

@Override
public boolean processMessage(Event event, String field, String destination) {
    if (!event.containsKey(field)) {
        return false;
    }
    String line = event.get(field).toString();
    Match gm = grok.match(line);
    gm.captures();
    if (!gm.isNull()) {
        // Results from grok needs to be cleaned
        for (Map.Entry<String, Object> e : gm.toMap().entrySet()) {
            String destinationField = e.getKey();
            // . is a special field name, it mean a value to put back in the original field
            if (".".equals(e.getKey())) {
                destinationField = field;
            }
            // Needed until https://github.com/thekrakken/java-grok/issues/61 is fixed
            if (e.getKey().equals(e.getKey().toUpperCase()) && !".".equals(e.getKey())) {
                continue;
            }
            if (e.getValue() == null) {
                continue;
            }
            if (e.getValue() instanceof List) {
                List<?> listvalue = (List<?>) e.getValue();
                List<String> newvalues = new ArrayList<>();
                listvalue.stream().filter(i -> i != null).map(i -> i.toString()).forEach(newvalues::add);
                if (newvalues.size() == 0) {
                    continue;
                } else if (newvalues.size() == 1) {
                    event.put(destinationField, newvalues.get(0));
                } else {
                    event.put(destinationField, newvalues);
                }
            } else {
                event.put(destinationField, e.getValue());
            }
        }
        return true;
    }
    return false;
}
Also used : BufferedInputStream(java.io.BufferedInputStream) PatternSyntaxException(java.util.regex.PatternSyntaxException) GrokException(io.thekraken.grok.api.exception.GrokException) URISyntaxException(java.net.URISyntaxException) Level(org.apache.logging.log4j.Level) Match(io.thekraken.grok.api.Match) IOException(java.io.IOException) InputStreamReader(java.io.InputStreamReader) ArrayList(java.util.ArrayList) List(java.util.List) Helpers(loghub.Helpers) Map(java.util.Map) Collections(java.util.Collections) Event(loghub.Event) InputStream(java.io.InputStream) Properties(loghub.configuration.Properties) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) Map(java.util.Map) Match(io.thekraken.grok.api.Match)

Aggregations

Match (io.thekraken.grok.api.Match)5 IOException (java.io.IOException)2 InputStream (java.io.InputStream)2 Map (java.util.Map)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 JsonElement (com.google.gson.JsonElement)1 JsonObject (com.google.gson.JsonObject)1 GrokException (io.thekraken.grok.api.exception.GrokException)1 BufferedInputStream (java.io.BufferedInputStream)1 InputStreamReader (java.io.InputStreamReader)1 OutputStream (java.io.OutputStream)1 URISyntaxException (java.net.URISyntaxException)1 Charset (java.nio.charset.Charset)1 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Matcher (java.util.regex.Matcher)1 Pattern (java.util.regex.Pattern)1