use of com.facebook.presto.rcfile.binary.BinaryRcFileEncoding in project presto by prestodb.
the class TestRcFileReaderManual method readValues.
private static List<Integer> readValues(Slice data, int offset, int length) throws IOException {
if (offset < 0) {
// adjust length to new offset
length += offset;
offset = 0;
}
if (offset + length > data.length()) {
length = data.length() - offset;
}
RcFileReader reader = new RcFileReader(new SliceRcFileDataSource(data), new BinaryRcFileEncoding(), ImmutableMap.of(0, SMALLINT), new BogusRcFileCodecFactory(), offset, length, new DataSize(1, MEGABYTE));
ImmutableList.Builder<Integer> values = ImmutableList.builder();
while (reader.advance() >= 0) {
Block block = reader.readBlock(0);
for (int position = 0; position < block.getPositionCount(); position++) {
values.add((int) SMALLINT.getLong(block, position));
}
}
return values.build();
}
use of com.facebook.presto.rcfile.binary.BinaryRcFileEncoding in project presto by prestodb.
the class RcFileFileWriterFactory method createFileWriter.
@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session) {
if (!HiveSessionProperties.isRcfileOptimizedWriterEnabled(session)) {
return Optional.empty();
}
if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
return Optional.empty();
}
RcFileEncoding rcFileEncoding;
if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
rcFileEncoding = new BinaryRcFileEncoding();
} else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
rcFileEncoding = createTextVectorEncoding(schema, hiveStorageTimeZone);
} else {
return Optional.empty();
}
Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));
// existing tables and partitions may have columns in a different order than the writer is providing, so build
// and index to rearrange columns in the proper order
List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings().splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
OutputStream outputStream = fileSystem.create(path);
Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty();
if (HiveSessionProperties.isRcfileOptimizedWriterValidate(session)) {
validationInputFactory = Optional.of(() -> {
try {
return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path), fileSystem.getFileStatus(path).getLen());
} catch (IOException e) {
throw Throwables.propagate(e);
}
});
}
return Optional.of(new RcFileFileWriter(outputStream, rcFileEncoding, fileColumnTypes, codecName, fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()).put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), validationInputFactory));
} catch (Exception e) {
throw Throwables.propagate(e);
}
}
use of com.facebook.presto.rcfile.binary.BinaryRcFileEncoding in project presto by prestodb.
the class RcFilePageSourceFactory method createPageSource.
@Override
public Optional<? extends ConnectorPageSource> createPageSource(Configuration configuration, ConnectorSession session, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone) {
if (!isRcfileOptimizedReaderEnabled(session)) {
return Optional.empty();
}
RcFileEncoding rcFileEncoding;
String deserializerClassName = getDeserializerClassName(schema);
if (deserializerClassName.equals(LazyBinaryColumnarSerDe.class.getName())) {
rcFileEncoding = new BinaryRcFileEncoding();
} else if (deserializerClassName.equals(ColumnarSerDe.class.getName())) {
rcFileEncoding = createTextVectorEncoding(schema, hiveStorageTimeZone);
} else {
return Optional.empty();
}
long size;
FSDataInputStream inputStream;
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
size = fileSystem.getFileStatus(path).getLen();
inputStream = fileSystem.open(path);
} catch (Exception e) {
if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
}
try {
ImmutableMap.Builder<Integer, Type> readColumns = ImmutableMap.builder();
for (HiveColumnHandle column : columns) {
readColumns.put(column.getHiveColumnIndex(), column.getHiveType().getType(typeManager));
}
RcFileReader rcFileReader = new RcFileReader(new HdfsRcFileDataSource(path.toString(), inputStream, size), rcFileEncoding, readColumns.build(), new AircompressorCodecFactory(new HadoopCodecFactory(configuration.getClassLoader())), start, length, new DataSize(1, Unit.MEGABYTE));
return Optional.of(new RcFilePageSource(rcFileReader, columns, hiveStorageTimeZone, typeManager));
} catch (Throwable e) {
try {
inputStream.close();
} catch (IOException ignored) {
}
if (e instanceof PrestoException) {
throw (PrestoException) e;
}
String message = splitError(e, path, start, length);
if (e.getClass().getSimpleName().equals("BlockMissingException")) {
throw new PrestoException(HIVE_MISSING_DATA, message, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
}
}
Aggregations