Search in sources :

Example 1 with HdfsRcFileDataSource

use of com.facebook.presto.hive.rcfile.HdfsRcFileDataSource in project presto by prestodb.

the class RcFileFileWriterFactory method createFileWriter.

@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session, Optional<EncryptionInformation> encryptionInformation) {
    if (!HiveSessionProperties.isRcfileOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }
    if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }
    RcFileEncoding rcFileEncoding;
    if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
        rcFileEncoding = new BinaryRcFileEncoding();
    } else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
        rcFileEncoding = createTextVectorEncoding(schema, hiveStorageTimeZone);
    } else {
        return Optional.empty();
    }
    Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));
    // existing tables and partitions may have columns in a different order than the writer is providing, so build
    // an index to rearrange columns in the proper order
    List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings().splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
    List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
        OutputStream outputStream = fileSystem.create(path);
        Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty();
        if (HiveSessionProperties.isRcfileOptimizedWriterValidate(session)) {
            validationInputFactory = Optional.of(() -> {
                try {
                    return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path), fileSystem.getFileStatus(path).getLen(), stats);
                } catch (IOException e) {
                    throw new PrestoException(HIVE_WRITE_VALIDATION_FAILED, e);
                }
            });
        }
        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };
        return Optional.of(new RcFileFileWriter(outputStream, rollbackAction, rcFileEncoding, fileColumnTypes, codecName, fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()).put(MetastoreUtil.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), validationInputFactory));
    } catch (Exception e) {
        throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating RCFile file", e);
    }
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) FileSystem(org.apache.hadoop.fs.FileSystem) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) HIVE_WRITE_VALIDATION_FAILED(com.facebook.presto.hive.HiveErrorCode.HIVE_WRITE_VALIDATION_FAILED) Callable(java.util.concurrent.Callable) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) PrestoException(com.facebook.presto.spi.PrestoException) Supplier(java.util.function.Supplier) Inject(javax.inject.Inject) MetastoreUtil(com.facebook.presto.hive.metastore.MetastoreUtil) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) Splitter(com.google.common.base.Splitter) Type(com.facebook.presto.common.type.Type) OutputStream(java.io.OutputStream) RcFileDataSource(com.facebook.presto.rcfile.RcFileDataSource) META_TABLE_COLUMN_TYPES(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES) Properties(java.util.Properties) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) ImmutableMap(com.google.common.collect.ImmutableMap) HdfsRcFileDataSource(com.facebook.presto.hive.rcfile.HdfsRcFileDataSource) RCFileOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat) IOException(java.io.IOException) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) JobConf(org.apache.hadoop.mapred.JobConf) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) FileOutputFormat(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) RcFilePageSourceFactory.createTextVectorEncoding(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory.createTextVectorEncoding) Optional(java.util.Optional) HIVE_WRITER_OPEN_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) HiveType.toHiveTypes(com.facebook.presto.hive.HiveType.toHiveTypes) OutputStream(java.io.OutputStream) PrestoException(com.facebook.presto.spi.PrestoException) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) IOException(java.io.IOException) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) Type(com.facebook.presto.common.type.Type) HdfsRcFileDataSource(com.facebook.presto.hive.rcfile.HdfsRcFileDataSource) FileSystem(org.apache.hadoop.fs.FileSystem) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) Supplier(java.util.function.Supplier) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding)

Example 2 with HdfsRcFileDataSource

use of com.facebook.presto.hive.rcfile.HdfsRcFileDataSource in project presto by prestodb.

the class RcFileFileWriterFactory method createFileWriter.

@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session) {
    if (!HiveSessionProperties.isRcfileOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }
    if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }
    RcFileEncoding rcFileEncoding;
    if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
        rcFileEncoding = new BinaryRcFileEncoding();
    } else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
        rcFileEncoding = createTextVectorEncoding(schema, hiveStorageTimeZone);
    } else {
        return Optional.empty();
    }
    Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));
    // existing tables and partitions may have columns in a different order than the writer is providing, so build
    // and index to rearrange columns in the proper order
    List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings().splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
    List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
        OutputStream outputStream = fileSystem.create(path);
        Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty();
        if (HiveSessionProperties.isRcfileOptimizedWriterValidate(session)) {
            validationInputFactory = Optional.of(() -> {
                try {
                    return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path), fileSystem.getFileStatus(path).getLen());
                } catch (IOException e) {
                    throw Throwables.propagate(e);
                }
            });
        }
        return Optional.of(new RcFileFileWriter(outputStream, rcFileEncoding, fileColumnTypes, codecName, fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()).put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), validationInputFactory));
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) TypeManager(com.facebook.presto.spi.type.TypeManager) FileSystem(org.apache.hadoop.fs.FileSystem) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) Supplier(java.util.function.Supplier) Inject(javax.inject.Inject) Type(com.facebook.presto.spi.type.Type) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) Splitter(com.google.common.base.Splitter) OutputStream(java.io.OutputStream) RcFileDataSource(com.facebook.presto.rcfile.RcFileDataSource) META_TABLE_COLUMN_TYPES(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES) Properties(java.util.Properties) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) ImmutableMap(com.google.common.collect.ImmutableMap) HdfsRcFileDataSource(com.facebook.presto.hive.rcfile.HdfsRcFileDataSource) RCFileOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat) Throwables(com.google.common.base.Throwables) IOException(java.io.IOException) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) JobConf(org.apache.hadoop.mapred.JobConf) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) FileOutputFormat(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) RcFilePageSourceFactory.createTextVectorEncoding(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory.createTextVectorEncoding) Optional(java.util.Optional) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) HiveType.toHiveTypes(com.facebook.presto.hive.HiveType.toHiveTypes) OutputStream(java.io.OutputStream) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) IOException(java.io.IOException) IOException(java.io.IOException) Type(com.facebook.presto.spi.type.Type) HdfsRcFileDataSource(com.facebook.presto.hive.rcfile.HdfsRcFileDataSource) FileSystem(org.apache.hadoop.fs.FileSystem) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) Supplier(java.util.function.Supplier) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding)

Aggregations

HiveType.toHiveTypes (com.facebook.presto.hive.HiveType.toHiveTypes)2 StorageFormat (com.facebook.presto.hive.metastore.StorageFormat)2 HdfsRcFileDataSource (com.facebook.presto.hive.rcfile.HdfsRcFileDataSource)2 RcFilePageSourceFactory.createTextVectorEncoding (com.facebook.presto.hive.rcfile.RcFilePageSourceFactory.createTextVectorEncoding)2 RcFileDataSource (com.facebook.presto.rcfile.RcFileDataSource)2 RcFileEncoding (com.facebook.presto.rcfile.RcFileEncoding)2 BinaryRcFileEncoding (com.facebook.presto.rcfile.binary.BinaryRcFileEncoding)2 ConnectorSession (com.facebook.presto.spi.ConnectorSession)2 Splitter (com.google.common.base.Splitter)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 IOException (java.io.IOException)2 OutputStream (java.io.OutputStream)2 List (java.util.List)2 Objects.requireNonNull (java.util.Objects.requireNonNull)2 Optional (java.util.Optional)2 Properties (java.util.Properties)2 Supplier (java.util.function.Supplier)2 Collectors.toList (java.util.stream.Collectors.toList)2 Inject (javax.inject.Inject)2 FileSystem (org.apache.hadoop.fs.FileSystem)2