use of io.airlift.joni.exception.ValueException in project presto by prestodb.
the class JoniRegexpFunctions method appendReplacement.
private static void appendReplacement(SliceOutput result, Slice source, Regex pattern, Region region, Slice replacement) {
// Handle the following items:
// 1. ${name};
// 2. $0, $1, $123 (group 123, if exists; or group 12, if exists; or group 1);
// 3. \\, \$, \t (literal 't').
// 4. Anything that doesn't starts with \ or $ is considered regular bytes
int idx = 0;
while (idx < replacement.length()) {
byte nextByte = replacement.getByte(idx);
if (nextByte == '$') {
idx++;
if (idx == replacement.length()) {
// not using checkArgument because `.toStringUtf8` is expensive
throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Illegal replacement sequence: " + replacement.toStringUtf8());
}
nextByte = replacement.getByte(idx);
int backref;
if (nextByte == '{') {
// case 1 in the above comment
idx++;
int startCursor = idx;
while (idx < replacement.length()) {
nextByte = replacement.getByte(idx);
if (nextByte == '}') {
break;
}
idx++;
}
byte[] groupName = replacement.getBytes(startCursor, idx - startCursor);
try {
backref = pattern.nameToBackrefNumber(groupName, 0, groupName.length, region);
} catch (ValueException e) {
throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Illegal replacement sequence: unknown group { " + new String(groupName, StandardCharsets.UTF_8) + " }");
}
idx++;
} else {
// case 2 in the above comment
backref = nextByte - '0';
if (backref < 0 || backref > 9) {
// not using checkArgument because `.toStringUtf8` is expensive
throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Illegal replacement sequence: " + replacement.toStringUtf8());
}
if (region.numRegs <= backref) {
throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Illegal replacement sequence: unknown group " + backref);
}
idx++;
while (idx < replacement.length()) {
// Adaptive group number: find largest group num that is not greater than actual number of groups
int nextDigit = replacement.getByte(idx) - '0';
if (nextDigit < 0 || nextDigit > 9) {
break;
}
int newBackref = (backref * 10) + nextDigit;
if (region.numRegs <= newBackref) {
break;
}
backref = newBackref;
idx++;
}
}
int beg = region.beg[backref];
int end = region.end[backref];
if (beg != -1 && end != -1) {
// the specific group doesn't exist in the current match, skip
result.appendBytes(source.slice(beg, end - beg));
}
} else {
// case 3 and 4 in the above comment
if (nextByte == '\\') {
idx++;
if (idx == replacement.length()) {
// not using checkArgument because `.toStringUtf8` is expensive
throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Illegal replacement sequence: " + replacement.toStringUtf8());
}
nextByte = replacement.getByte(idx);
}
result.appendByte(nextByte);
idx++;
}
}
}
Aggregations