Search in sources :

Example 1 with Region

use of org.joni.Region in project elasticsearch by elastic.

the class Grok method captures.

/**
     * Matches and returns any named captures within a compiled grok expression that matched
     * within the provided text.
     *
     * @param text the text to match and extract values from.
     * @return a map containing field names and their respective coerced values that matched.
     */
public Map<String, Object> captures(String text) {
    byte[] textAsBytes = text.getBytes(StandardCharsets.UTF_8);
    Map<String, Object> fields = new HashMap<>();
    Matcher matcher = compiledExpression.matcher(textAsBytes);
    int result = matcher.search(0, textAsBytes.length, Option.DEFAULT);
    if (result != -1 && compiledExpression.numberOfNames() > 0) {
        Region region = matcher.getEagerRegion();
        for (Iterator<NameEntry> entry = compiledExpression.namedBackrefIterator(); entry.hasNext(); ) {
            NameEntry e = entry.next();
            String groupName = new String(e.name, e.nameP, e.nameEnd - e.nameP, StandardCharsets.UTF_8);
            for (int number : e.getBackRefs()) {
                if (region.beg[number] >= 0) {
                    String matchValue = new String(textAsBytes, region.beg[number], region.end[number] - region.beg[number], StandardCharsets.UTF_8);
                    GrokMatchGroup match = new GrokMatchGroup(groupName, matchValue);
                    fields.put(match.getName(), match.getValue());
                    break;
                }
            }
        }
        return fields;
    } else if (result != -1) {
        return fields;
    }
    return null;
}
Also used : HashMap(java.util.HashMap) Matcher(org.joni.Matcher) Region(org.joni.Region) NameEntry(org.joni.NameEntry)

Example 2 with Region

use of org.joni.Region in project elasticsearch by elastic.

the class Grok method toRegex.

/**
     * converts a grok expression into a named regex expression
     *
     * @return named regex expression
     */
public String toRegex(String grokPattern) {
    byte[] grokPatternBytes = grokPattern.getBytes(StandardCharsets.UTF_8);
    Matcher matcher = GROK_PATTERN_REGEX.matcher(grokPatternBytes);
    int result = matcher.search(0, grokPatternBytes.length, Option.NONE);
    if (result != -1) {
        Region region = matcher.getEagerRegion();
        String namedPatternRef = groupMatch(NAME_GROUP, region, grokPattern);
        String subName = groupMatch(SUBNAME_GROUP, region, grokPattern);
        // TODO(tal): Support definitions
        String definition = groupMatch(DEFINITION_GROUP, region, grokPattern);
        String patternName = groupMatch(PATTERN_GROUP, region, grokPattern);
        String pattern = patternBank.get(patternName);
        String grokPart;
        if (namedCaptures && subName != null) {
            grokPart = String.format(Locale.US, "(?<%s>%s)", namedPatternRef, pattern);
        } else if (!namedCaptures) {
            grokPart = String.format(Locale.US, "(?<%s>%s)", patternName + "_" + String.valueOf(result), pattern);
        } else {
            grokPart = String.format(Locale.US, "(?:%s)", pattern);
        }
        String start = new String(grokPatternBytes, 0, result, StandardCharsets.UTF_8);
        String rest = new String(grokPatternBytes, region.end[0], grokPatternBytes.length - region.end[0], StandardCharsets.UTF_8);
        return start + toRegex(grokPart + rest);
    }
    return grokPattern;
}
Also used : Matcher(org.joni.Matcher) Region(org.joni.Region)

Aggregations

Matcher (org.joni.Matcher)2 Region (org.joni.Region)2 HashMap (java.util.HashMap)1 NameEntry (org.joni.NameEntry)1