Search in sources :

Example 1 with RcFileDataSource

use of com.facebook.presto.rcfile.RcFileDataSource in project presto by prestodb.

the class RcFileFileWriterFactory method createFileWriter.

@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session) {
    if (!HiveSessionProperties.isRcfileOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }
    if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }
    RcFileEncoding rcFileEncoding;
    if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
        rcFileEncoding = new BinaryRcFileEncoding();
    } else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
        rcFileEncoding = createTextVectorEncoding(schema, hiveStorageTimeZone);
    } else {
        return Optional.empty();
    }
    Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));
    // existing tables and partitions may have columns in a different order than the writer is providing, so build
    // and index to rearrange columns in the proper order
    List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings().splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
    List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
        OutputStream outputStream = fileSystem.create(path);
        Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty();
        if (HiveSessionProperties.isRcfileOptimizedWriterValidate(session)) {
            validationInputFactory = Optional.of(() -> {
                try {
                    return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path), fileSystem.getFileStatus(path).getLen());
                } catch (IOException e) {
                    throw Throwables.propagate(e);
                }
            });
        }
        return Optional.of(new RcFileFileWriter(outputStream, rcFileEncoding, fileColumnTypes, codecName, fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()).put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), validationInputFactory));
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) TypeManager(com.facebook.presto.spi.type.TypeManager) FileSystem(org.apache.hadoop.fs.FileSystem) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) Supplier(java.util.function.Supplier) Inject(javax.inject.Inject) Type(com.facebook.presto.spi.type.Type) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) Splitter(com.google.common.base.Splitter) OutputStream(java.io.OutputStream) RcFileDataSource(com.facebook.presto.rcfile.RcFileDataSource) META_TABLE_COLUMN_TYPES(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES) Properties(java.util.Properties) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) ImmutableMap(com.google.common.collect.ImmutableMap) HdfsRcFileDataSource(com.facebook.presto.hive.rcfile.HdfsRcFileDataSource) RCFileOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat) Throwables(com.google.common.base.Throwables) IOException(java.io.IOException) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) JobConf(org.apache.hadoop.mapred.JobConf) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) FileOutputFormat(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) RcFilePageSourceFactory.createTextVectorEncoding(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory.createTextVectorEncoding) Optional(java.util.Optional) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) HiveType.toHiveTypes(com.facebook.presto.hive.HiveType.toHiveTypes) OutputStream(java.io.OutputStream) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) IOException(java.io.IOException) IOException(java.io.IOException) Type(com.facebook.presto.spi.type.Type) HdfsRcFileDataSource(com.facebook.presto.hive.rcfile.HdfsRcFileDataSource) FileSystem(org.apache.hadoop.fs.FileSystem) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) Supplier(java.util.function.Supplier) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding)

Aggregations

HiveType.toHiveTypes (com.facebook.presto.hive.HiveType.toHiveTypes)1 StorageFormat (com.facebook.presto.hive.metastore.StorageFormat)1 HdfsRcFileDataSource (com.facebook.presto.hive.rcfile.HdfsRcFileDataSource)1 RcFilePageSourceFactory.createTextVectorEncoding (com.facebook.presto.hive.rcfile.RcFilePageSourceFactory.createTextVectorEncoding)1 RcFileDataSource (com.facebook.presto.rcfile.RcFileDataSource)1 RcFileEncoding (com.facebook.presto.rcfile.RcFileEncoding)1 BinaryRcFileEncoding (com.facebook.presto.rcfile.binary.BinaryRcFileEncoding)1 ConnectorSession (com.facebook.presto.spi.ConnectorSession)1 Type (com.facebook.presto.spi.type.Type)1 TypeManager (com.facebook.presto.spi.type.TypeManager)1 Splitter (com.google.common.base.Splitter)1 Throwables (com.google.common.base.Throwables)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 IOException (java.io.IOException)1 OutputStream (java.io.OutputStream)1 List (java.util.List)1 Objects.requireNonNull (java.util.Objects.requireNonNull)1 Optional (java.util.Optional)1 Properties (java.util.Properties)1 Supplier (java.util.function.Supplier)1