use of io.trino.spi.function.ScalarFunction in project trino by trinodb.
the class VarbinaryFunctions method crc32.
@Description("Compute CRC-32")
@ScalarFunction
@SqlType(StandardTypes.BIGINT)
public static long crc32(@SqlType(StandardTypes.VARBINARY) Slice slice) {
CRC32 crc32 = new CRC32();
crc32.update(slice.toByteBuffer());
return crc32.getValue();
}
use of io.trino.spi.function.ScalarFunction in project trino by trinodb.
the class VarbinaryFunctions method reverse.
@Description("Reverse a given varbinary")
@ScalarFunction("reverse")
@SqlType(StandardTypes.VARBINARY)
public static Slice reverse(@SqlType("varbinary") Slice inputSlice) {
if (inputSlice.length() == 0) {
return EMPTY_SLICE;
}
int length = inputSlice.length();
Slice reverse = Slices.allocate(length);
for (int i = 0; i < length; i++) {
reverse.setByte(i, inputSlice.getByte((length - 1) - i));
}
return reverse;
}
use of io.trino.spi.function.ScalarFunction in project trino by trinodb.
the class StringFunctions method substring.
@Description("Substring of given length starting at an index")
@ScalarFunction(alias = "substr")
@LiteralParameters("x")
@SqlType("varchar(x)")
public static Slice substring(@SqlType("varchar(x)") Slice utf8, @SqlType(StandardTypes.BIGINT) long start, @SqlType(StandardTypes.BIGINT) long length) {
if (start == 0 || (length <= 0) || (utf8.length() == 0)) {
return Slices.EMPTY_SLICE;
}
int startCodePoint = Ints.saturatedCast(start);
int lengthCodePoints = Ints.saturatedCast(length);
if (startCodePoint > 0) {
int indexStart = offsetOfCodePoint(utf8, startCodePoint - 1);
if (indexStart < 0) {
// before beginning of string
return Slices.EMPTY_SLICE;
}
int indexEnd = offsetOfCodePoint(utf8, indexStart, lengthCodePoints);
if (indexEnd < 0) {
// after end of string
indexEnd = utf8.length();
}
return utf8.slice(indexStart, indexEnd - indexStart);
}
// negative start is relative to end of string
int codePoints = countCodePoints(utf8);
startCodePoint += codePoints;
// before beginning of string
if (startCodePoint < 0) {
return Slices.EMPTY_SLICE;
}
int indexStart = offsetOfCodePoint(utf8, startCodePoint);
int indexEnd;
if (startCodePoint + lengthCodePoints < codePoints) {
indexEnd = offsetOfCodePoint(utf8, indexStart, lengthCodePoints);
} else {
indexEnd = utf8.length();
}
return utf8.slice(indexStart, indexEnd - indexStart);
}
use of io.trino.spi.function.ScalarFunction in project trino by trinodb.
the class StringFunctions method levenshteinDistance.
@Description("Computes Levenshtein distance between two strings")
@ScalarFunction
@LiteralParameters({ "x", "y" })
@SqlType(StandardTypes.BIGINT)
public static long levenshteinDistance(@SqlType("varchar(x)") Slice left, @SqlType("varchar(y)") Slice right) {
int[] leftCodePoints = castToCodePoints(left);
int[] rightCodePoints = castToCodePoints(right);
if (leftCodePoints.length < rightCodePoints.length) {
int[] tempCodePoints = leftCodePoints;
leftCodePoints = rightCodePoints;
rightCodePoints = tempCodePoints;
}
if (rightCodePoints.length == 0) {
return leftCodePoints.length;
}
checkCondition((leftCodePoints.length * (rightCodePoints.length - 1)) <= 1_000_000, INVALID_FUNCTION_ARGUMENT, "The combined inputs for Levenshtein distance are too large");
int[] distances = new int[rightCodePoints.length];
for (int i = 0; i < rightCodePoints.length; i++) {
distances[i] = i + 1;
}
for (int i = 0; i < leftCodePoints.length; i++) {
int leftUpDistance = distances[0];
if (leftCodePoints[i] == rightCodePoints[0]) {
distances[0] = i;
} else {
distances[0] = Math.min(i, distances[0]) + 1;
}
for (int j = 1; j < rightCodePoints.length; j++) {
int leftUpDistanceNext = distances[j];
if (leftCodePoints[i] == rightCodePoints[j]) {
distances[j] = leftUpDistance;
} else {
distances[j] = Math.min(distances[j - 1], Math.min(leftUpDistance, distances[j])) + 1;
}
leftUpDistance = leftUpDistanceNext;
}
}
return distances[rightCodePoints.length - 1];
}
use of io.trino.spi.function.ScalarFunction in project trino by trinodb.
the class StringFunctions method translate.
@Description("Translate characters from the source string based on original and translations strings")
@ScalarFunction
@LiteralParameters({ "x", "y", "z" })
@SqlType(StandardTypes.VARCHAR)
public static Slice translate(@SqlType("varchar(x)") Slice source, @SqlType("varchar(y)") Slice from, @SqlType("varchar(z)") Slice to) {
int[] fromCodePoints = castToCodePoints(from);
int[] toCodePoints = castToCodePoints(to);
Int2IntOpenHashMap map = new Int2IntOpenHashMap(fromCodePoints.length);
for (int index = 0; index < fromCodePoints.length; index++) {
int fromCodePoint = fromCodePoints[index];
map.putIfAbsent(fromCodePoint, index < toCodePoints.length ? toCodePoints[index] : -1);
}
int[] sourceCodePoints = castToCodePoints(source);
int[] targetCodePoints = new int[sourceCodePoints.length];
int targetPositions = 0;
int targetBytes = 0;
for (int index = 0; index < sourceCodePoints.length; index++) {
int codePoint = sourceCodePoints[index];
if (map.containsKey(codePoint)) {
int translatedCodePoint = map.get(codePoint);
if (translatedCodePoint == -1) {
continue;
}
codePoint = translatedCodePoint;
}
targetCodePoints[targetPositions++] = codePoint;
targetBytes += lengthOfCodePoint(codePoint);
}
Slice target = Slices.allocate(targetBytes);
int offset = 0;
for (int index = 0; index < targetPositions; index++) {
offset += setCodePointAt(targetCodePoints[index], target, offset);
}
return target;
}
Aggregations