use of io.airlift.joni.Matcher in project presto by prestodb.
the class JoniRegexpFunctions method regexpSplit.
@ScalarFunction
@LiteralParameters("x")
@Description("returns array of strings split by pattern")
@SqlType("array(varchar(x))")
public static Block regexpSplit(@SqlType("varchar(x)") Slice source, @SqlType(JoniRegexpType.NAME) Regex pattern) {
Matcher matcher = pattern.matcher(source.getBytes());
BlockBuilder blockBuilder = VARCHAR.createBlockBuilder(new BlockBuilderStatus(), 32);
int lastEnd = 0;
int nextStart = 0;
while (true) {
int offset = matcher.search(nextStart, source.length(), Option.DEFAULT);
if (offset == -1) {
break;
}
if (matcher.getEnd() == matcher.getBegin()) {
nextStart = matcher.getEnd() + 1;
} else {
nextStart = matcher.getEnd();
}
Slice slice = source.slice(lastEnd, matcher.getBegin() - lastEnd);
lastEnd = matcher.getEnd();
VARCHAR.writeSlice(blockBuilder, slice);
}
VARCHAR.writeSlice(blockBuilder, source.slice(lastEnd, source.length() - lastEnd));
return blockBuilder.build();
}
use of io.airlift.joni.Matcher in project presto by prestodb.
the class JoniRegexpFunctions method regexpExtract.
@SqlNullable
@Description("returns regex group of extracted string with a pattern")
@ScalarFunction
@LiteralParameters("x")
@SqlType("varchar(x)")
public static Slice regexpExtract(@SqlType("varchar(x)") Slice source, @SqlType(JoniRegexpType.NAME) Regex pattern, @SqlType(StandardTypes.BIGINT) long groupIndex) {
Matcher matcher = pattern.matcher(source.getBytes());
validateGroup(groupIndex, matcher.getEagerRegion());
int group = toIntExact(groupIndex);
int offset = matcher.search(0, source.length(), Option.DEFAULT);
if (offset == -1) {
return null;
}
Region region = matcher.getEagerRegion();
int beg = region.beg[group];
int end = region.end[group];
if (beg == -1) {
// end == -1 must be true
return null;
}
Slice slice = source.slice(beg, end - beg);
return slice;
}
use of io.airlift.joni.Matcher in project presto by prestodb.
the class JoniRegexpFunctions method regexpReplace.
@Description("replaces substrings matching a regular expression by given string")
@ScalarFunction
@LiteralParameters({ "x", "y", "z" })
// to get the formula: x + max(x * y / 2, y) * (x + 1)
@Constraint(variable = "z", expression = "min(2147483647, x + max(x * y / 2, y) * (x + 1))")
@SqlType("varchar(z)")
public static Slice regexpReplace(@SqlType("varchar(x)") Slice source, @SqlType(JoniRegexpType.NAME) Regex pattern, @SqlType("varchar(y)") Slice replacement) {
Matcher matcher = pattern.matcher(source.getBytes());
SliceOutput sliceOutput = new DynamicSliceOutput(source.length() + replacement.length() * 5);
int lastEnd = 0;
// nextStart is the same as lastEnd, unless the last match was zero-width. In such case, nextStart is lastEnd + 1.
int nextStart = 0;
while (true) {
int offset = matcher.search(nextStart, source.length(), Option.DEFAULT);
if (offset == -1) {
break;
}
if (matcher.getEnd() == matcher.getBegin()) {
nextStart = matcher.getEnd() + 1;
} else {
nextStart = matcher.getEnd();
}
Slice sliceBetweenReplacements = source.slice(lastEnd, matcher.getBegin() - lastEnd);
lastEnd = matcher.getEnd();
sliceOutput.appendBytes(sliceBetweenReplacements);
appendReplacement(sliceOutput, source, pattern, matcher.getEagerRegion(), replacement);
}
sliceOutput.appendBytes(source.slice(lastEnd, source.length() - lastEnd));
return sliceOutput.slice();
}
use of io.airlift.joni.Matcher in project presto by prestodb.
the class JoniRegexpFunctions method regexpExtractAll.
@Description("group(s) extracted using the given pattern")
@ScalarFunction
@LiteralParameters("x")
@SqlType("array(varchar(x))")
public static Block regexpExtractAll(@SqlType("varchar(x)") Slice source, @SqlType(JoniRegexpType.NAME) Regex pattern, @SqlType(StandardTypes.BIGINT) long groupIndex) {
Matcher matcher = pattern.matcher(source.getBytes());
validateGroup(groupIndex, matcher.getEagerRegion());
BlockBuilder blockBuilder = VARCHAR.createBlockBuilder(new BlockBuilderStatus(), 32);
int group = toIntExact(groupIndex);
int nextStart = 0;
while (true) {
int offset = matcher.search(nextStart, source.length(), Option.DEFAULT);
if (offset == -1) {
break;
}
if (matcher.getEnd() == matcher.getBegin()) {
nextStart = matcher.getEnd() + 1;
} else {
nextStart = matcher.getEnd();
}
Region region = matcher.getEagerRegion();
int beg = region.beg[group];
int end = region.end[group];
if (beg == -1 || end == -1) {
blockBuilder.appendNull();
} else {
Slice slice = source.slice(beg, end - beg);
VARCHAR.writeSlice(blockBuilder, slice);
}
}
return blockBuilder.build();
}
use of io.airlift.joni.Matcher in project presto by prestodb.
the class JoniRegexpFunctions method regexpLike.
@Description("returns whether the pattern is contained within the string")
@ScalarFunction
@LiteralParameters("x")
@SqlType(StandardTypes.BOOLEAN)
public static boolean regexpLike(@SqlType("varchar(x)") Slice source, @SqlType(JoniRegexpType.NAME) Regex pattern) {
Matcher m = pattern.matcher(source.getBytes());
int offset = m.search(0, source.length(), Option.DEFAULT);
return offset != -1;
}
Aggregations