use of io.prestosql.spi.function.Description in project hetu-core by openlookeng.
the class StringFunctions method replace.
@Description("greedily replaces occurrences of a pattern with a string")
@ScalarFunction
@LiteralParameters({ "x", "y", "z", "u" })
@Constraint(variable = "u", expression = "min(2147483647, x + z * (x + 1))")
@SqlType("varchar(u)")
public static Slice replace(@SqlType("varchar(x)") Slice str, @SqlType("varchar(y)") Slice search, @SqlType("varchar(z)") Slice replace) {
// Empty search?
if (search.length() == 0) {
// With empty `search` we insert `replace` in front of every character and and the end
Slice buffer = Slices.allocate((countCodePoints(str) + 1) * replace.length() + str.length());
// Always start with replace
buffer.setBytes(0, replace);
int indexBuffer = replace.length();
// After every code point insert `replace`
int index = 0;
while (index < str.length()) {
int codePointLength = lengthOfCodePointSafe(str, index);
// Append current code point
buffer.setBytes(indexBuffer, str, index, codePointLength);
indexBuffer += codePointLength;
// Append `replace`
buffer.setBytes(indexBuffer, replace);
indexBuffer += replace.length();
// Advance pointer to current code point
index += codePointLength;
}
return buffer;
}
// Allocate a reasonable buffer
Slice buffer = Slices.allocate(str.length());
int index = 0;
int indexBuffer = 0;
while (index < str.length()) {
int matchIndex = str.indexOf(search, index);
// Found a match?
if (matchIndex < 0) {
// No match found so copy the rest of string
int bytesToCopy = str.length() - index;
buffer = Slices.ensureSize(buffer, indexBuffer + bytesToCopy);
buffer.setBytes(indexBuffer, str, index, bytesToCopy);
indexBuffer += bytesToCopy;
break;
}
int bytesToCopy = matchIndex - index;
buffer = Slices.ensureSize(buffer, indexBuffer + bytesToCopy + replace.length());
// Non empty match?
if (bytesToCopy > 0) {
buffer.setBytes(indexBuffer, str, index, bytesToCopy);
indexBuffer += bytesToCopy;
}
// Non empty replace?
if (replace.length() > 0) {
buffer.setBytes(indexBuffer, replace);
indexBuffer += replace.length();
}
// Continue searching after match
index = matchIndex + search.length();
}
return buffer.slice(0, indexBuffer);
}
use of io.prestosql.spi.function.Description in project hetu-core by openlookeng.
the class StringFunctions method splitPart.
@SqlNullable
@Description("splits a string by a delimiter and returns the specified field (counting from one)")
@ScalarFunction
@LiteralParameters({ "x", "y" })
@SqlType("varchar(x)")
public static Slice splitPart(@SqlType("varchar(x)") Slice string, @SqlType("varchar(y)") Slice delimiter, @SqlType(StandardTypes.BIGINT) long index) {
checkCondition(index > 0, INVALID_FUNCTION_ARGUMENT, "Index must be greater than zero");
// Empty delimiter? Then every character will be a split
if (delimiter.length() == 0) {
int startCodePoint = toIntExact(index);
int indexStart = offsetOfCodePoint(string, startCodePoint - 1);
if (indexStart < 0) {
// index too big
return null;
}
int length = lengthOfCodePoint(string, indexStart);
if (indexStart + length > string.length()) {
throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Invalid UTF-8 encoding");
}
return string.slice(indexStart, length);
}
int matchCount = 0;
int previousIndex = 0;
while (previousIndex < string.length()) {
int matchIndex = string.indexOf(delimiter, previousIndex);
// No match
if (matchIndex < 0) {
break;
}
// Reached the requested part?
if (++matchCount == index) {
return string.slice(previousIndex, matchIndex - previousIndex);
}
// Continue searching after the delimiter
previousIndex = matchIndex + delimiter.length();
}
if (matchCount == index - 1) {
// returns last section of the split
return string.slice(previousIndex, string.length() - previousIndex);
}
// index is too big, null is returned
return null;
}
use of io.prestosql.spi.function.Description in project hetu-core by openlookeng.
the class StringFunctions method levenshteinDistance.
@Description("computes Levenshtein distance between two strings")
@ScalarFunction
@LiteralParameters({ "x", "y" })
@SqlType(StandardTypes.BIGINT)
public static long levenshteinDistance(@SqlType("varchar(x)") Slice left, @SqlType("varchar(y)") Slice right) {
int[] leftCodePoints = castToCodePoints(left);
int[] rightCodePoints = castToCodePoints(right);
if (leftCodePoints.length < rightCodePoints.length) {
int[] tempCodePoints = leftCodePoints;
leftCodePoints = rightCodePoints;
rightCodePoints = tempCodePoints;
}
if (rightCodePoints.length == 0) {
return leftCodePoints.length;
}
checkCondition((leftCodePoints.length * (rightCodePoints.length - 1)) <= 1_000_000, INVALID_FUNCTION_ARGUMENT, "The combined inputs for Levenshtein distance are too large");
int[] distances = new int[rightCodePoints.length];
for (int i = 0; i < rightCodePoints.length; i++) {
distances[i] = i + 1;
}
for (int i = 0; i < leftCodePoints.length; i++) {
int leftUpDistance = distances[0];
if (leftCodePoints[i] == rightCodePoints[0]) {
distances[0] = i;
} else {
distances[0] = Math.min(i, distances[0]) + 1;
}
for (int j = 1; j < rightCodePoints.length; j++) {
int leftUpDistanceNext = distances[j];
if (leftCodePoints[i] == rightCodePoints[j]) {
distances[j] = leftUpDistance;
} else {
distances[j] = Math.min(distances[j - 1], Math.min(leftUpDistance, distances[j])) + 1;
}
leftUpDistance = leftUpDistanceNext;
}
}
return distances[rightCodePoints.length - 1];
}
use of io.prestosql.spi.function.Description in project hetu-core by openlookeng.
the class VarbinaryFunctions method toIEEE754Binary32.
@Description("encode value as a big endian varbinary according to IEEE 754 single-precision floating-point format")
@ScalarFunction("to_ieee754_32")
@SqlType(StandardTypes.VARBINARY)
public static Slice toIEEE754Binary32(@SqlType(StandardTypes.REAL) long value) {
Slice slice = Slices.allocate(Float.BYTES);
slice.setInt(0, Integer.reverseBytes((int) value));
return slice;
}
use of io.prestosql.spi.function.Description in project hetu-core by openlookeng.
the class VarbinaryFunctions method toBigEndian32.
@Description("encode value as a 32-bit 2's complement big endian varbinary")
@ScalarFunction("to_big_endian_32")
@SqlType(StandardTypes.VARBINARY)
public static Slice toBigEndian32(@SqlType(StandardTypes.INTEGER) long value) {
Slice slice = Slices.allocate(Integer.BYTES);
slice.setInt(0, Integer.reverseBytes((int) value));
return slice;
}
Aggregations