use of org.joni.Region in project elasticsearch by elastic.
the class Grok method captures.
/**
* Matches and returns any named captures within a compiled grok expression that matched
* within the provided text.
*
* @param text the text to match and extract values from.
* @return a map containing field names and their respective coerced values that matched.
*/
public Map<String, Object> captures(String text) {
byte[] textAsBytes = text.getBytes(StandardCharsets.UTF_8);
Map<String, Object> fields = new HashMap<>();
Matcher matcher = compiledExpression.matcher(textAsBytes);
int result = matcher.search(0, textAsBytes.length, Option.DEFAULT);
if (result != -1 && compiledExpression.numberOfNames() > 0) {
Region region = matcher.getEagerRegion();
for (Iterator<NameEntry> entry = compiledExpression.namedBackrefIterator(); entry.hasNext(); ) {
NameEntry e = entry.next();
String groupName = new String(e.name, e.nameP, e.nameEnd - e.nameP, StandardCharsets.UTF_8);
for (int number : e.getBackRefs()) {
if (region.beg[number] >= 0) {
String matchValue = new String(textAsBytes, region.beg[number], region.end[number] - region.beg[number], StandardCharsets.UTF_8);
GrokMatchGroup match = new GrokMatchGroup(groupName, matchValue);
fields.put(match.getName(), match.getValue());
break;
}
}
}
return fields;
} else if (result != -1) {
return fields;
}
return null;
}
use of org.joni.Region in project elasticsearch by elastic.
the class Grok method toRegex.
/**
* converts a grok expression into a named regex expression
*
* @return named regex expression
*/
public String toRegex(String grokPattern) {
byte[] grokPatternBytes = grokPattern.getBytes(StandardCharsets.UTF_8);
Matcher matcher = GROK_PATTERN_REGEX.matcher(grokPatternBytes);
int result = matcher.search(0, grokPatternBytes.length, Option.NONE);
if (result != -1) {
Region region = matcher.getEagerRegion();
String namedPatternRef = groupMatch(NAME_GROUP, region, grokPattern);
String subName = groupMatch(SUBNAME_GROUP, region, grokPattern);
// TODO(tal): Support definitions
String definition = groupMatch(DEFINITION_GROUP, region, grokPattern);
String patternName = groupMatch(PATTERN_GROUP, region, grokPattern);
String pattern = patternBank.get(patternName);
String grokPart;
if (namedCaptures && subName != null) {
grokPart = String.format(Locale.US, "(?<%s>%s)", namedPatternRef, pattern);
} else if (!namedCaptures) {
grokPart = String.format(Locale.US, "(?<%s>%s)", patternName + "_" + String.valueOf(result), pattern);
} else {
grokPart = String.format(Locale.US, "(?:%s)", pattern);
}
String start = new String(grokPatternBytes, 0, result, StandardCharsets.UTF_8);
String rest = new String(grokPatternBytes, region.end[0], grokPatternBytes.length - region.end[0], StandardCharsets.UTF_8);
return start + toRegex(grokPart + rest);
}
return grokPattern;
}