use of org.joni.Matcher in project elasticsearch by elastic.
the class Grok method captures.
/**
* Matches and returns any named captures within a compiled grok expression that matched
* within the provided text.
*
* @param text the text to match and extract values from.
* @return a map containing field names and their respective coerced values that matched.
*/
public Map<String, Object> captures(String text) {
byte[] textAsBytes = text.getBytes(StandardCharsets.UTF_8);
Map<String, Object> fields = new HashMap<>();
Matcher matcher = compiledExpression.matcher(textAsBytes);
int result = matcher.search(0, textAsBytes.length, Option.DEFAULT);
if (result != -1 && compiledExpression.numberOfNames() > 0) {
Region region = matcher.getEagerRegion();
for (Iterator<NameEntry> entry = compiledExpression.namedBackrefIterator(); entry.hasNext(); ) {
NameEntry e = entry.next();
String groupName = new String(e.name, e.nameP, e.nameEnd - e.nameP, StandardCharsets.UTF_8);
for (int number : e.getBackRefs()) {
if (region.beg[number] >= 0) {
String matchValue = new String(textAsBytes, region.beg[number], region.end[number] - region.beg[number], StandardCharsets.UTF_8);
GrokMatchGroup match = new GrokMatchGroup(groupName, matchValue);
fields.put(match.getName(), match.getValue());
break;
}
}
}
return fields;
} else if (result != -1) {
return fields;
}
return null;
}
use of org.joni.Matcher in project phoenix by apache.
the class JONIPattern method replaceAll.
private byte[] replaceAll(byte[] srcBytes, int srcOffset, int srcLen, byte[] replaceBytes, int replaceOffset, int replaceLen) {
class PairInt {
public int begin, end;
public PairInt(int begin, int end) {
this.begin = begin;
this.end = end;
}
}
int srcRange = srcOffset + srcLen;
Matcher matcher = pattern.matcher(srcBytes, 0, srcRange);
int cur = srcOffset;
List<PairInt> searchResults = new LinkedList<PairInt>();
int totalBytesNeeded = 0;
while (true) {
int nextCur = matcher.search(cur, srcRange, Option.DEFAULT);
if (nextCur < 0) {
totalBytesNeeded += srcRange - cur;
break;
}
searchResults.add(new PairInt(matcher.getBegin(), matcher.getEnd()));
totalBytesNeeded += (nextCur - cur) + replaceLen;
cur = matcher.getEnd();
}
byte[] ret = new byte[totalBytesNeeded];
int curPosInSrc = srcOffset, curPosInRet = 0;
for (PairInt pair : searchResults) {
System.arraycopy(srcBytes, curPosInSrc, ret, curPosInRet, pair.begin - curPosInSrc);
curPosInRet += pair.begin - curPosInSrc;
System.arraycopy(replaceBytes, replaceOffset, ret, curPosInRet, replaceLen);
curPosInRet += replaceLen;
curPosInSrc = pair.end;
}
System.arraycopy(srcBytes, curPosInSrc, ret, curPosInRet, srcRange - curPosInSrc);
return ret;
}
use of org.joni.Matcher in project elasticsearch by elastic.
the class Grok method toRegex.
/**
* converts a grok expression into a named regex expression
*
* @return named regex expression
*/
public String toRegex(String grokPattern) {
byte[] grokPatternBytes = grokPattern.getBytes(StandardCharsets.UTF_8);
Matcher matcher = GROK_PATTERN_REGEX.matcher(grokPatternBytes);
int result = matcher.search(0, grokPatternBytes.length, Option.NONE);
if (result != -1) {
Region region = matcher.getEagerRegion();
String namedPatternRef = groupMatch(NAME_GROUP, region, grokPattern);
String subName = groupMatch(SUBNAME_GROUP, region, grokPattern);
// TODO(tal): Support definitions
String definition = groupMatch(DEFINITION_GROUP, region, grokPattern);
String patternName = groupMatch(PATTERN_GROUP, region, grokPattern);
String pattern = patternBank.get(patternName);
String grokPart;
if (namedCaptures && subName != null) {
grokPart = String.format(Locale.US, "(?<%s>%s)", namedPatternRef, pattern);
} else if (!namedCaptures) {
grokPart = String.format(Locale.US, "(?<%s>%s)", patternName + "_" + String.valueOf(result), pattern);
} else {
grokPart = String.format(Locale.US, "(?:%s)", pattern);
}
String start = new String(grokPatternBytes, 0, result, StandardCharsets.UTF_8);
String rest = new String(grokPatternBytes, region.end[0], grokPatternBytes.length - region.end[0], StandardCharsets.UTF_8);
return start + toRegex(grokPart + rest);
}
return grokPattern;
}
use of org.joni.Matcher in project elasticsearch by elastic.
the class Grok method match.
/**
* Checks whether a specific text matches the defined grok expression.
*
* @param text the string to match
* @return true if grok expression matches text, false otherwise.
*/
public boolean match(String text) {
Matcher matcher = compiledExpression.matcher(text.getBytes(StandardCharsets.UTF_8));
int result = matcher.search(0, text.length(), Option.DEFAULT);
return (result != -1);
}
use of org.joni.Matcher in project es6draft by anba.
the class JoniRegExpMatcher method matcher.
@Override
public JoniMatchState matcher(String s) {
UEncoding enc = getEncoding();
if (s != lastInput) {
lastInput = s;
lastInputBytes = enc.toBytes(s);
}
int length = lastInputBytes.length - enc.minLength();
Matcher matcher = getPattern().matcher(lastInputBytes, 0, length);
return new JoniMatchState(enc, matcher, s, negativeLAGroups);
}
Aggregations