use of io.trino.rcfile.text.TextRcFileEncoding in project trino by trinodb.
the class RcFilePageSourceFactory method createTextVectorEncoding.
public static TextRcFileEncoding createTextVectorEncoding(Properties schema) {
// separators
int nestingLevels;
if (!"true".equalsIgnoreCase(schema.getProperty(SERIALIZATION_EXTEND_NESTING_LEVELS))) {
nestingLevels = TEXT_LEGACY_NESTING_LEVELS;
} else {
nestingLevels = TEXT_EXTENDED_NESTING_LEVELS;
}
byte[] separators = getDefaultSeparators(nestingLevels);
// the first three separators are set by old-old properties
separators[0] = getByte(schema.getProperty(FIELD_DELIM, schema.getProperty(SERIALIZATION_FORMAT)), separators[0]);
// for map field collection delimiter, Hive 1.x uses "colelction.delim" but Hive 3.x uses "collection.delim"
// https://issues.apache.org/jira/browse/HIVE-16922
separators[1] = getByte(schema.getProperty(COLLECTION_DELIM, schema.getProperty("colelction.delim")), separators[1]);
separators[2] = getByte(schema.getProperty(MAPKEY_DELIM), separators[2]);
// null sequence
Slice nullSequence;
String nullSequenceString = schema.getProperty(SERIALIZATION_NULL_FORMAT);
if (nullSequenceString == null) {
nullSequence = DEFAULT_NULL_SEQUENCE;
} else {
nullSequence = Slices.utf8Slice(nullSequenceString);
}
// last column takes rest
String lastColumnTakesRestString = schema.getProperty(SERIALIZATION_LAST_COLUMN_TAKES_REST);
boolean lastColumnTakesRest = "true".equalsIgnoreCase(lastColumnTakesRestString);
// escaped
String escapeProperty = schema.getProperty(ESCAPE_CHAR);
Byte escapeByte = null;
if (escapeProperty != null) {
escapeByte = getByte(escapeProperty, (byte) '\\');
}
return new TextRcFileEncoding(nullSequence, separators, escapeByte, lastColumnTakesRest);
}
Aggregations