Search in sources :

Example 1 with HiveFileWriter

use of com.facebook.presto.hive.HiveFileWriter in project presto by prestodb.

the class ParquetFileWriterFactory method createFileWriter.

@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf conf, ConnectorSession session, Optional<EncryptionInformation> encryptionInformation) {
    if (!isParquetOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }
    if (!MapredParquetOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }
    ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder().setMaxPageSize(getParquetWriterPageSize(session)).setMaxBlockSize(getParquetWriterBlockSize(session)).build();
    CompressionCodecName compressionCodecName = getCompression(conf);
    List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings().splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
    List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, conf);
        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };
        return Optional.of(new ParquetFileWriter(fileSystem.create(path), rollbackAction, fileColumnNames, fileColumnTypes, parquetWriterOptions, fileInputColumnIndexes, compressionCodecName));
    } catch (IOException e) {
        throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating Parquet file", e);
    }
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) DateTimeZone(org.joda.time.DateTimeZone) FileSystem(org.apache.hadoop.fs.FileSystem) Inject(com.google.inject.Inject) HiveSessionProperties.getParquetWriterPageSize(com.facebook.presto.hive.HiveSessionProperties.getParquetWriterPageSize) Callable(java.util.concurrent.Callable) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) PrestoException(com.facebook.presto.spi.PrestoException) HiveSessionProperties.isParquetOptimizedWriterEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetOptimizedWriterEnabled) NodeVersion(com.facebook.presto.hive.NodeVersion) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) HiveSessionProperties.getParquetWriterBlockSize(com.facebook.presto.hive.HiveSessionProperties.getParquetWriterBlockSize) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) Splitter(com.google.common.base.Splitter) Type(com.facebook.presto.common.type.Type) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) META_TABLE_COLUMN_TYPES(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES) Properties(java.util.Properties) ParquetOutputFormat(org.apache.parquet.hadoop.ParquetOutputFormat) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) HiveFileWriterFactory(com.facebook.presto.hive.HiveFileWriterFactory) ParquetWriterOptions(com.facebook.presto.parquet.writer.ParquetWriterOptions) HiveFileWriter(com.facebook.presto.hive.HiveFileWriter) IOException(java.io.IOException) ConnectorSession(com.facebook.presto.spi.ConnectorSession) JobConf(org.apache.hadoop.mapred.JobConf) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) MapredParquetOutputFormat(org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat) Optional(java.util.Optional) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) HIVE_WRITER_OPEN_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR) HiveType.toHiveTypes(com.facebook.presto.hive.HiveType.toHiveTypes) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) Type(com.facebook.presto.common.type.Type) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) FileSystem(org.apache.hadoop.fs.FileSystem) ParquetWriterOptions(com.facebook.presto.parquet.writer.ParquetWriterOptions)

Aggregations

Type (com.facebook.presto.common.type.Type)1 TypeManager (com.facebook.presto.common.type.TypeManager)1 EncryptionInformation (com.facebook.presto.hive.EncryptionInformation)1 HdfsEnvironment (com.facebook.presto.hive.HdfsEnvironment)1 HiveClientConfig (com.facebook.presto.hive.HiveClientConfig)1 HIVE_WRITER_OPEN_ERROR (com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR)1 HiveFileWriter (com.facebook.presto.hive.HiveFileWriter)1 HiveFileWriterFactory (com.facebook.presto.hive.HiveFileWriterFactory)1 HiveSessionProperties.getParquetWriterBlockSize (com.facebook.presto.hive.HiveSessionProperties.getParquetWriterBlockSize)1 HiveSessionProperties.getParquetWriterPageSize (com.facebook.presto.hive.HiveSessionProperties.getParquetWriterPageSize)1 HiveSessionProperties.isParquetOptimizedWriterEnabled (com.facebook.presto.hive.HiveSessionProperties.isParquetOptimizedWriterEnabled)1 HiveType.toHiveTypes (com.facebook.presto.hive.HiveType.toHiveTypes)1 NodeVersion (com.facebook.presto.hive.NodeVersion)1 StorageFormat (com.facebook.presto.hive.metastore.StorageFormat)1 ParquetWriterOptions (com.facebook.presto.parquet.writer.ParquetWriterOptions)1 ConnectorSession (com.facebook.presto.spi.ConnectorSession)1 PrestoException (com.facebook.presto.spi.PrestoException)1 Splitter (com.google.common.base.Splitter)1 Inject (com.google.inject.Inject)1 IOException (java.io.IOException)1