use of io.airlift.slice.SliceUtf8.tryGetCodePointAt in project presto by prestodb.
the class OrcMetadataReader method findStringStatisticTruncationPositionForOriginalOrcWriter.
@VisibleForTesting
static int findStringStatisticTruncationPositionForOriginalOrcWriter(Slice utf8) {
int length = utf8.length();
int position = 0;
while (position < length) {
int codePoint = tryGetCodePointAt(utf8, position);
// stop at invalid sequences
if (codePoint < 0) {
break;
}
// the string stats are truncated at the first replacement character.
if (codePoint == REPLACEMENT_CHARACTER_CODE_POINT) {
break;
}
// at the first occurrence the surrogate character and 0xFF byte is appended to it.
if (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) {
break;
}
position += lengthOfCodePoint(codePoint);
}
return position;
}
Aggregations