use of io.prestosql.spi.function.LiteralParameters in project hetu-core by openlookeng.
the class StringFunctions method levenshteinDistance.
@Description("computes Levenshtein distance between two strings")
@ScalarFunction
@LiteralParameters({ "x", "y" })
@SqlType(StandardTypes.BIGINT)
public static long levenshteinDistance(@SqlType("varchar(x)") Slice left, @SqlType("varchar(y)") Slice right) {
int[] leftCodePoints = castToCodePoints(left);
int[] rightCodePoints = castToCodePoints(right);
if (leftCodePoints.length < rightCodePoints.length) {
int[] tempCodePoints = leftCodePoints;
leftCodePoints = rightCodePoints;
rightCodePoints = tempCodePoints;
}
if (rightCodePoints.length == 0) {
return leftCodePoints.length;
}
checkCondition((leftCodePoints.length * (rightCodePoints.length - 1)) <= 1_000_000, INVALID_FUNCTION_ARGUMENT, "The combined inputs for Levenshtein distance are too large");
int[] distances = new int[rightCodePoints.length];
for (int i = 0; i < rightCodePoints.length; i++) {
distances[i] = i + 1;
}
for (int i = 0; i < leftCodePoints.length; i++) {
int leftUpDistance = distances[0];
if (leftCodePoints[i] == rightCodePoints[0]) {
distances[0] = i;
} else {
distances[0] = Math.min(i, distances[0]) + 1;
}
for (int j = 1; j < rightCodePoints.length; j++) {
int leftUpDistanceNext = distances[j];
if (leftCodePoints[i] == rightCodePoints[j]) {
distances[j] = leftUpDistance;
} else {
distances[j] = Math.min(distances[j - 1], Math.min(leftUpDistance, distances[j])) + 1;
}
leftUpDistance = leftUpDistanceNext;
}
}
return distances[rightCodePoints.length - 1];
}
use of io.prestosql.spi.function.LiteralParameters in project hetu-core by openlookeng.
the class StringFunctions method substr.
@Description("substring of given length starting at an index")
@ScalarFunction
@LiteralParameters("x")
@SqlType("varchar(x)")
public static Slice substr(@SqlType("varchar(x)") Slice utf8, @SqlType(StandardTypes.BIGINT) long start, @SqlType(StandardTypes.BIGINT) long length) {
if (start == 0 || (length <= 0) || (utf8.length() == 0)) {
return Slices.EMPTY_SLICE;
}
int startCodePoint = Ints.saturatedCast(start);
int lengthCodePoints = Ints.saturatedCast(length);
if (startCodePoint > 0) {
int indexStart = offsetOfCodePoint(utf8, startCodePoint - 1);
if (indexStart < 0) {
// before beginning of string
return Slices.EMPTY_SLICE;
}
int indexEnd = offsetOfCodePoint(utf8, indexStart, lengthCodePoints);
if (indexEnd < 0) {
// after end of string
indexEnd = utf8.length();
}
return utf8.slice(indexStart, indexEnd - indexStart);
}
// negative start is relative to end of string
int codePoints = countCodePoints(utf8);
startCodePoint += codePoints;
// before beginning of string
if (startCodePoint < 0) {
return Slices.EMPTY_SLICE;
}
int indexStart = offsetOfCodePoint(utf8, startCodePoint);
int indexEnd;
if (startCodePoint + lengthCodePoints < codePoints) {
indexEnd = offsetOfCodePoint(utf8, indexStart, lengthCodePoints);
} else {
indexEnd = utf8.length();
}
return utf8.slice(indexStart, indexEnd - indexStart);
}
use of io.prestosql.spi.function.LiteralParameters in project hetu-core by openlookeng.
the class StringFunctions method fromUtf8.
@Description("decodes the UTF-8 encoded string")
@ScalarFunction
@LiteralParameters("x")
@SqlType(StandardTypes.VARCHAR)
public static Slice fromUtf8(@SqlType(StandardTypes.VARBINARY) Slice slice, @SqlType("varchar(x)") Slice replacementCharacter) {
int count = countCodePoints(replacementCharacter);
if (count > 1) {
throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Replacement character string must empty or a single character");
}
OptionalInt replacementCodePoint;
if (count == 1) {
try {
replacementCodePoint = OptionalInt.of(getCodePointAt(replacementCharacter, 0));
} catch (InvalidUtf8Exception e) {
throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Invalid replacement character");
}
} else {
replacementCodePoint = OptionalInt.empty();
}
return SliceUtf8.fixInvalidUtf8(slice, replacementCodePoint);
}
use of io.prestosql.spi.function.LiteralParameters in project hetu-core by openlookeng.
the class StringFunctions method concat.
// TODO: implement N arguments char concat
@Description("concatenates given character strings")
@ScalarFunction
@LiteralParameters({ "x", "y", "u" })
@Constraint(variable = "u", expression = "x + y")
@SqlType("char(u)")
public static Slice concat(@LiteralParameter("x") Long x, @SqlType("char(x)") Slice left, @SqlType("char(y)") Slice right) {
int rightLength = right.length();
if (rightLength == 0) {
return left;
}
Slice paddedLeft = padSpaces(left, x.intValue());
int leftLength = paddedLeft.length();
Slice result = Slices.allocate(leftLength + rightLength);
result.setBytes(0, paddedLeft);
result.setBytes(leftLength, right);
return result;
}
use of io.prestosql.spi.function.LiteralParameters in project hetu-core by openlookeng.
the class StringFunctions method hammingDistance.
@Description("computes Hamming distance between two strings")
@ScalarFunction
@LiteralParameters({ "x", "y" })
@SqlType(StandardTypes.BIGINT)
public static long hammingDistance(@SqlType("varchar(x)") Slice left, @SqlType("varchar(y)") Slice right) {
int distance = 0;
int leftPosition = 0;
int rightPosition = 0;
while (leftPosition < left.length() && rightPosition < right.length()) {
int codePointLeft = tryGetCodePointAt(left, leftPosition);
int codePointRight = tryGetCodePointAt(right, rightPosition);
// the following code treats them as equal if they happen to be of the same length
if (codePointLeft != codePointRight) {
distance++;
}
leftPosition += codePointLeft > 0 ? lengthOfCodePoint(codePointLeft) : -codePointLeft;
rightPosition += codePointRight > 0 ? lengthOfCodePoint(codePointRight) : -codePointRight;
}
checkCondition(leftPosition == left.length() && rightPosition == right.length(), INVALID_FUNCTION_ARGUMENT, "The input strings to hamming_distance function must have the same length");
return distance;
}
Aggregations