Search in sources :

Example 11 with LiteralParameters

use of io.prestosql.spi.function.LiteralParameters in project hetu-core by openlookeng.

the class StringFunctions method levenshteinDistance.

@Description("computes Levenshtein distance between two strings")
@ScalarFunction
@LiteralParameters({ "x", "y" })
@SqlType(StandardTypes.BIGINT)
public static long levenshteinDistance(@SqlType("varchar(x)") Slice left, @SqlType("varchar(y)") Slice right) {
    int[] leftCodePoints = castToCodePoints(left);
    int[] rightCodePoints = castToCodePoints(right);
    if (leftCodePoints.length < rightCodePoints.length) {
        int[] tempCodePoints = leftCodePoints;
        leftCodePoints = rightCodePoints;
        rightCodePoints = tempCodePoints;
    }
    if (rightCodePoints.length == 0) {
        return leftCodePoints.length;
    }
    checkCondition((leftCodePoints.length * (rightCodePoints.length - 1)) <= 1_000_000, INVALID_FUNCTION_ARGUMENT, "The combined inputs for Levenshtein distance are too large");
    int[] distances = new int[rightCodePoints.length];
    for (int i = 0; i < rightCodePoints.length; i++) {
        distances[i] = i + 1;
    }
    for (int i = 0; i < leftCodePoints.length; i++) {
        int leftUpDistance = distances[0];
        if (leftCodePoints[i] == rightCodePoints[0]) {
            distances[0] = i;
        } else {
            distances[0] = Math.min(i, distances[0]) + 1;
        }
        for (int j = 1; j < rightCodePoints.length; j++) {
            int leftUpDistanceNext = distances[j];
            if (leftCodePoints[i] == rightCodePoints[j]) {
                distances[j] = leftUpDistance;
            } else {
                distances[j] = Math.min(distances[j - 1], Math.min(leftUpDistance, distances[j])) + 1;
            }
            leftUpDistance = leftUpDistanceNext;
        }
    }
    return distances[rightCodePoints.length - 1];
}
Also used : Constraint(io.prestosql.type.Constraint) SliceUtf8.lengthOfCodePoint(io.airlift.slice.SliceUtf8.lengthOfCodePoint) SliceUtf8.offsetOfCodePoint(io.airlift.slice.SliceUtf8.offsetOfCodePoint) ScalarFunction(io.prestosql.spi.function.ScalarFunction) Description(io.prestosql.spi.function.Description) LiteralParameters(io.prestosql.spi.function.LiteralParameters) SqlType(io.prestosql.spi.function.SqlType)

Example 12 with LiteralParameters

use of io.prestosql.spi.function.LiteralParameters in project hetu-core by openlookeng.

the class StringFunctions method substr.

@Description("substring of given length starting at an index")
@ScalarFunction
@LiteralParameters("x")
@SqlType("varchar(x)")
public static Slice substr(@SqlType("varchar(x)") Slice utf8, @SqlType(StandardTypes.BIGINT) long start, @SqlType(StandardTypes.BIGINT) long length) {
    if (start == 0 || (length <= 0) || (utf8.length() == 0)) {
        return Slices.EMPTY_SLICE;
    }
    int startCodePoint = Ints.saturatedCast(start);
    int lengthCodePoints = Ints.saturatedCast(length);
    if (startCodePoint > 0) {
        int indexStart = offsetOfCodePoint(utf8, startCodePoint - 1);
        if (indexStart < 0) {
            // before beginning of string
            return Slices.EMPTY_SLICE;
        }
        int indexEnd = offsetOfCodePoint(utf8, indexStart, lengthCodePoints);
        if (indexEnd < 0) {
            // after end of string
            indexEnd = utf8.length();
        }
        return utf8.slice(indexStart, indexEnd - indexStart);
    }
    // negative start is relative to end of string
    int codePoints = countCodePoints(utf8);
    startCodePoint += codePoints;
    // before beginning of string
    if (startCodePoint < 0) {
        return Slices.EMPTY_SLICE;
    }
    int indexStart = offsetOfCodePoint(utf8, startCodePoint);
    int indexEnd;
    if (startCodePoint + lengthCodePoints < codePoints) {
        indexEnd = offsetOfCodePoint(utf8, indexStart, lengthCodePoints);
    } else {
        indexEnd = utf8.length();
    }
    return utf8.slice(indexStart, indexEnd - indexStart);
}
Also used : Constraint(io.prestosql.type.Constraint) SliceUtf8.lengthOfCodePoint(io.airlift.slice.SliceUtf8.lengthOfCodePoint) SliceUtf8.offsetOfCodePoint(io.airlift.slice.SliceUtf8.offsetOfCodePoint) ScalarFunction(io.prestosql.spi.function.ScalarFunction) Description(io.prestosql.spi.function.Description) LiteralParameters(io.prestosql.spi.function.LiteralParameters) SqlType(io.prestosql.spi.function.SqlType)

Example 13 with LiteralParameters

use of io.prestosql.spi.function.LiteralParameters in project hetu-core by openlookeng.

the class StringFunctions method fromUtf8.

@Description("decodes the UTF-8 encoded string")
@ScalarFunction
@LiteralParameters("x")
@SqlType(StandardTypes.VARCHAR)
public static Slice fromUtf8(@SqlType(StandardTypes.VARBINARY) Slice slice, @SqlType("varchar(x)") Slice replacementCharacter) {
    int count = countCodePoints(replacementCharacter);
    if (count > 1) {
        throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Replacement character string must empty or a single character");
    }
    OptionalInt replacementCodePoint;
    if (count == 1) {
        try {
            replacementCodePoint = OptionalInt.of(getCodePointAt(replacementCharacter, 0));
        } catch (InvalidUtf8Exception e) {
            throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Invalid replacement character");
        }
    } else {
        replacementCodePoint = OptionalInt.empty();
    }
    return SliceUtf8.fixInvalidUtf8(slice, replacementCodePoint);
}
Also used : InvalidUtf8Exception(io.airlift.slice.InvalidUtf8Exception) PrestoException(io.prestosql.spi.PrestoException) OptionalInt(java.util.OptionalInt) Constraint(io.prestosql.type.Constraint) SliceUtf8.lengthOfCodePoint(io.airlift.slice.SliceUtf8.lengthOfCodePoint) SliceUtf8.offsetOfCodePoint(io.airlift.slice.SliceUtf8.offsetOfCodePoint) ScalarFunction(io.prestosql.spi.function.ScalarFunction) Description(io.prestosql.spi.function.Description) LiteralParameters(io.prestosql.spi.function.LiteralParameters) SqlType(io.prestosql.spi.function.SqlType)

Example 14 with LiteralParameters

use of io.prestosql.spi.function.LiteralParameters in project hetu-core by openlookeng.

the class StringFunctions method concat.

// TODO: implement N arguments char concat
@Description("concatenates given character strings")
@ScalarFunction
@LiteralParameters({ "x", "y", "u" })
@Constraint(variable = "u", expression = "x + y")
@SqlType("char(u)")
public static Slice concat(@LiteralParameter("x") Long x, @SqlType("char(x)") Slice left, @SqlType("char(y)") Slice right) {
    int rightLength = right.length();
    if (rightLength == 0) {
        return left;
    }
    Slice paddedLeft = padSpaces(left, x.intValue());
    int leftLength = paddedLeft.length();
    Slice result = Slices.allocate(leftLength + rightLength);
    result.setBytes(0, paddedLeft);
    result.setBytes(leftLength, right);
    return result;
}
Also used : Slice(io.airlift.slice.Slice) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Constraint(io.prestosql.type.Constraint) SliceUtf8.lengthOfCodePoint(io.airlift.slice.SliceUtf8.lengthOfCodePoint) SliceUtf8.offsetOfCodePoint(io.airlift.slice.SliceUtf8.offsetOfCodePoint) ScalarFunction(io.prestosql.spi.function.ScalarFunction) Description(io.prestosql.spi.function.Description) LiteralParameters(io.prestosql.spi.function.LiteralParameters) Constraint(io.prestosql.type.Constraint) SqlType(io.prestosql.spi.function.SqlType)

Example 15 with LiteralParameters

use of io.prestosql.spi.function.LiteralParameters in project hetu-core by openlookeng.

the class StringFunctions method hammingDistance.

@Description("computes Hamming distance between two strings")
@ScalarFunction
@LiteralParameters({ "x", "y" })
@SqlType(StandardTypes.BIGINT)
public static long hammingDistance(@SqlType("varchar(x)") Slice left, @SqlType("varchar(y)") Slice right) {
    int distance = 0;
    int leftPosition = 0;
    int rightPosition = 0;
    while (leftPosition < left.length() && rightPosition < right.length()) {
        int codePointLeft = tryGetCodePointAt(left, leftPosition);
        int codePointRight = tryGetCodePointAt(right, rightPosition);
        // the following code treats them as equal if they happen to be of the same length
        if (codePointLeft != codePointRight) {
            distance++;
        }
        leftPosition += codePointLeft > 0 ? lengthOfCodePoint(codePointLeft) : -codePointLeft;
        rightPosition += codePointRight > 0 ? lengthOfCodePoint(codePointRight) : -codePointRight;
    }
    checkCondition(leftPosition == left.length() && rightPosition == right.length(), INVALID_FUNCTION_ARGUMENT, "The input strings to hamming_distance function must have the same length");
    return distance;
}
Also used : Constraint(io.prestosql.type.Constraint) SliceUtf8.lengthOfCodePoint(io.airlift.slice.SliceUtf8.lengthOfCodePoint) SliceUtf8.offsetOfCodePoint(io.airlift.slice.SliceUtf8.offsetOfCodePoint) ScalarFunction(io.prestosql.spi.function.ScalarFunction) Description(io.prestosql.spi.function.Description) LiteralParameters(io.prestosql.spi.function.LiteralParameters) SqlType(io.prestosql.spi.function.SqlType)

Aggregations

LiteralParameters (io.prestosql.spi.function.LiteralParameters)25 SqlType (io.prestosql.spi.function.SqlType)24 ScalarFunction (io.prestosql.spi.function.ScalarFunction)20 Description (io.prestosql.spi.function.Description)15 Constraint (io.prestosql.type.Constraint)14 Slice (io.airlift.slice.Slice)10 SliceUtf8.lengthOfCodePoint (io.airlift.slice.SliceUtf8.lengthOfCodePoint)8 SliceUtf8.offsetOfCodePoint (io.airlift.slice.SliceUtf8.offsetOfCodePoint)8 PrestoException (io.prestosql.spi.PrestoException)7 SqlNullable (io.prestosql.spi.function.SqlNullable)7 Matcher (io.airlift.joni.Matcher)6 BlockBuilder (io.prestosql.spi.block.BlockBuilder)5 DynamicSliceOutput (io.airlift.slice.DynamicSliceOutput)4 SliceOutput (io.airlift.slice.SliceOutput)4 JsonParser (com.fasterxml.jackson.core.JsonParser)3 Region (io.airlift.joni.Region)3 Slices.utf8Slice (io.airlift.slice.Slices.utf8Slice)3 JsonUtil.createJsonParser (io.prestosql.util.JsonUtil.createJsonParser)3 IOException (java.io.IOException)3 Block (io.prestosql.spi.block.Block)2