Search in sources :

Example 1 with HIVE_WRITER_OPEN_ERROR

use of com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR in project presto by prestodb.

the class RcFileFileWriterFactory method createFileWriter.

@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session, Optional<EncryptionInformation> encryptionInformation) {
    if (!HiveSessionProperties.isRcfileOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }
    if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }
    RcFileEncoding rcFileEncoding;
    if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
        rcFileEncoding = new BinaryRcFileEncoding();
    } else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
        rcFileEncoding = createTextVectorEncoding(schema, hiveStorageTimeZone);
    } else {
        return Optional.empty();
    }
    Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));
    // existing tables and partitions may have columns in a different order than the writer is providing, so build
    // an index to rearrange columns in the proper order
    List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings().splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
    List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
        OutputStream outputStream = fileSystem.create(path);
        Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty();
        if (HiveSessionProperties.isRcfileOptimizedWriterValidate(session)) {
            validationInputFactory = Optional.of(() -> {
                try {
                    return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path), fileSystem.getFileStatus(path).getLen(), stats);
                } catch (IOException e) {
                    throw new PrestoException(HIVE_WRITE_VALIDATION_FAILED, e);
                }
            });
        }
        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };
        return Optional.of(new RcFileFileWriter(outputStream, rollbackAction, rcFileEncoding, fileColumnTypes, codecName, fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()).put(MetastoreUtil.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), validationInputFactory));
    } catch (Exception e) {
        throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating RCFile file", e);
    }
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) FileSystem(org.apache.hadoop.fs.FileSystem) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) HIVE_WRITE_VALIDATION_FAILED(com.facebook.presto.hive.HiveErrorCode.HIVE_WRITE_VALIDATION_FAILED) Callable(java.util.concurrent.Callable) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) PrestoException(com.facebook.presto.spi.PrestoException) Supplier(java.util.function.Supplier) Inject(javax.inject.Inject) MetastoreUtil(com.facebook.presto.hive.metastore.MetastoreUtil) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) Splitter(com.google.common.base.Splitter) Type(com.facebook.presto.common.type.Type) OutputStream(java.io.OutputStream) RcFileDataSource(com.facebook.presto.rcfile.RcFileDataSource) META_TABLE_COLUMN_TYPES(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES) Properties(java.util.Properties) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) ImmutableMap(com.google.common.collect.ImmutableMap) HdfsRcFileDataSource(com.facebook.presto.hive.rcfile.HdfsRcFileDataSource) RCFileOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat) IOException(java.io.IOException) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) JobConf(org.apache.hadoop.mapred.JobConf) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) FileOutputFormat(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) RcFilePageSourceFactory.createTextVectorEncoding(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory.createTextVectorEncoding) Optional(java.util.Optional) HIVE_WRITER_OPEN_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) HiveType.toHiveTypes(com.facebook.presto.hive.HiveType.toHiveTypes) OutputStream(java.io.OutputStream) PrestoException(com.facebook.presto.spi.PrestoException) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) IOException(java.io.IOException) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) Type(com.facebook.presto.common.type.Type) HdfsRcFileDataSource(com.facebook.presto.hive.rcfile.HdfsRcFileDataSource) FileSystem(org.apache.hadoop.fs.FileSystem) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) Supplier(java.util.function.Supplier) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding)

Example 2 with HIVE_WRITER_OPEN_ERROR

use of com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR in project presto by prestodb.

the class ParquetFileWriterFactory method createFileWriter.

@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf conf, ConnectorSession session, Optional<EncryptionInformation> encryptionInformation) {
    if (!isParquetOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }
    if (!MapredParquetOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }
    ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder().setMaxPageSize(getParquetWriterPageSize(session)).setMaxBlockSize(getParquetWriterBlockSize(session)).build();
    CompressionCodecName compressionCodecName = getCompression(conf);
    List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings().splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
    List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, conf);
        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };
        return Optional.of(new ParquetFileWriter(fileSystem.create(path), rollbackAction, fileColumnNames, fileColumnTypes, parquetWriterOptions, fileInputColumnIndexes, compressionCodecName));
    } catch (IOException e) {
        throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating Parquet file", e);
    }
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) DateTimeZone(org.joda.time.DateTimeZone) FileSystem(org.apache.hadoop.fs.FileSystem) Inject(com.google.inject.Inject) HiveSessionProperties.getParquetWriterPageSize(com.facebook.presto.hive.HiveSessionProperties.getParquetWriterPageSize) Callable(java.util.concurrent.Callable) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) PrestoException(com.facebook.presto.spi.PrestoException) HiveSessionProperties.isParquetOptimizedWriterEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetOptimizedWriterEnabled) NodeVersion(com.facebook.presto.hive.NodeVersion) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) HiveSessionProperties.getParquetWriterBlockSize(com.facebook.presto.hive.HiveSessionProperties.getParquetWriterBlockSize) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) Splitter(com.google.common.base.Splitter) Type(com.facebook.presto.common.type.Type) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) META_TABLE_COLUMN_TYPES(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES) Properties(java.util.Properties) ParquetOutputFormat(org.apache.parquet.hadoop.ParquetOutputFormat) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) HiveFileWriterFactory(com.facebook.presto.hive.HiveFileWriterFactory) ParquetWriterOptions(com.facebook.presto.parquet.writer.ParquetWriterOptions) HiveFileWriter(com.facebook.presto.hive.HiveFileWriter) IOException(java.io.IOException) ConnectorSession(com.facebook.presto.spi.ConnectorSession) JobConf(org.apache.hadoop.mapred.JobConf) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) MapredParquetOutputFormat(org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat) Optional(java.util.Optional) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) HIVE_WRITER_OPEN_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR) HiveType.toHiveTypes(com.facebook.presto.hive.HiveType.toHiveTypes) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) Type(com.facebook.presto.common.type.Type) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) FileSystem(org.apache.hadoop.fs.FileSystem) ParquetWriterOptions(com.facebook.presto.parquet.writer.ParquetWriterOptions)

Example 3 with HIVE_WRITER_OPEN_ERROR

use of com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR in project presto by prestodb.

the class OrcFileWriterFactory method createFileWriter.

@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session, Optional<EncryptionInformation> encryptionInformation) {
    if (!HiveSessionProperties.isOrcOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }
    OrcEncoding orcEncoding;
    if (OrcOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        orcEncoding = ORC;
    } else if (com.facebook.hive.orc.OrcOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        orcEncoding = DWRF;
    } else {
        return Optional.empty();
    }
    CompressionKind compression = getCompression(schema, configuration, orcEncoding);
    // existing tables and partitions may have columns in a different order than the writer is providing, so build
    // an index to rearrange columns in the proper order
    List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings().splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
    List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
        DataSink dataSink = createDataSink(session, fileSystem, path);
        Optional<Supplier<OrcDataSource>> validationInputFactory = Optional.empty();
        if (HiveSessionProperties.isOrcOptimizedWriterValidate(session)) {
            validationInputFactory = Optional.of(() -> {
                try {
                    return new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileSystem.getFileStatus(path).getLen(), getOrcMaxMergeDistance(session), getOrcMaxBufferSize(session), getOrcStreamBufferSize(session), false, fileSystem.open(path), readStats);
                } catch (IOException e) {
                    throw new PrestoException(HIVE_WRITE_VALIDATION_FAILED, e);
                }
            });
        }
        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };
        Optional<DwrfWriterEncryption> dwrfWriterEncryption = createDwrfEncryption(encryptionInformation, fileColumnNames, fileColumnTypes);
        return Optional.of(new OrcFileWriter(dataSink, rollbackAction, orcEncoding, fileColumnNames, fileColumnTypes, compression, orcFileWriterConfig.toOrcWriterOptionsBuilder().withFlushPolicy(DefaultOrcWriterFlushPolicy.builder().withStripeMinSize(getOrcOptimizedWriterMinStripeSize(session)).withStripeMaxSize(getOrcOptimizedWriterMaxStripeSize(session)).withStripeMaxRowCount(getOrcOptimizedWriterMaxStripeRows(session)).build()).withDictionaryMaxMemory(getOrcOptimizedWriterMaxDictionaryMemory(session)).withMaxStringStatisticsLimit(getOrcStringStatisticsLimit(session)).withIgnoreDictionaryRowGroupSizes(isExecutionBasedMemoryAccountingEnabled(session)).withDwrfStripeCacheEnabled(isDwrfWriterStripeCacheEnabled(session)).withDwrfStripeCacheMaxSize(getDwrfWriterStripeCacheeMaxSize(session)).build(), fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()).put(MetastoreUtil.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), hiveStorageTimeZone, validationInputFactory, getOrcOptimizedWriterValidateMode(session), stats, dwrfEncryptionProvider, dwrfWriterEncryption));
    } catch (IOException e) {
        throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating " + orcEncoding + " file. " + e.getMessage(), e);
    }
}
Also used : HdfsOrcDataSource(com.facebook.presto.hive.orc.HdfsOrcDataSource) DateTimeZone(org.joda.time.DateTimeZone) FileSystem(org.apache.hadoop.fs.FileSystem) HiveSessionProperties.getDwrfWriterStripeCacheeMaxSize(com.facebook.presto.hive.HiveSessionProperties.getDwrfWriterStripeCacheeMaxSize) HIVE_WRITE_VALIDATION_FAILED(com.facebook.presto.hive.HiveErrorCode.HIVE_WRITE_VALIDATION_FAILED) DataSink(com.facebook.presto.common.io.DataSink) HiveSessionProperties.getOrcOptimizedWriterMaxDictionaryMemory(com.facebook.presto.hive.HiveSessionProperties.getOrcOptimizedWriterMaxDictionaryMemory) OrcConf(org.apache.orc.OrcConf) HiveSessionProperties.getOrcOptimizedWriterMinStripeSize(com.facebook.presto.hive.HiveSessionProperties.getOrcOptimizedWriterMinStripeSize) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HiveSessionProperties.getOrcOptimizedWriterValidateMode(com.facebook.presto.hive.HiveSessionProperties.getOrcOptimizedWriterValidateMode) OrcDataSource(com.facebook.presto.orc.OrcDataSource) Splitter(com.google.common.base.Splitter) ENGLISH(java.util.Locale.ENGLISH) WriterEncryptionGroup(com.facebook.presto.orc.WriterEncryptionGroup) CRYPTO_SERVICE(com.facebook.presto.orc.metadata.KeyProvider.CRYPTO_SERVICE) HIVE_UNSUPPORTED_FORMAT(com.facebook.presto.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT) KeyProvider(com.facebook.presto.orc.metadata.KeyProvider) META_TABLE_COLUMN_TYPES(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HiveSessionProperties.isDwrfWriterStripeCacheEnabled(com.facebook.presto.hive.HiveSessionProperties.isDwrfWriterStripeCacheEnabled) HiveSessionProperties.isExecutionBasedMemoryAccountingEnabled(com.facebook.presto.hive.HiveSessionProperties.isExecutionBasedMemoryAccountingEnabled) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ORC(com.facebook.presto.orc.OrcEncoding.ORC) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) DWRF(com.facebook.presto.orc.OrcEncoding.DWRF) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) ImmutableListMultimap(com.google.common.collect.ImmutableListMultimap) Optional(java.util.Optional) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) HiveType.toHiveTypes(com.facebook.presto.hive.HiveType.toHiveTypes) IntStream(java.util.stream.IntStream) HiveSessionProperties.getOrcOptimizedWriterMaxStripeSize(com.facebook.presto.hive.HiveSessionProperties.getOrcOptimizedWriterMaxStripeSize) Slice(io.airlift.slice.Slice) HiveSessionProperties.getOrcMaxMergeDistance(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxMergeDistance) Flatten(org.weakref.jmx.Flatten) Callable(java.util.concurrent.Callable) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) DwrfWriterEncryption(com.facebook.presto.orc.DwrfWriterEncryption) DataSinkFactory(com.facebook.presto.hive.datasink.DataSinkFactory) PrestoException(com.facebook.presto.spi.PrestoException) Supplier(java.util.function.Supplier) UNKNOWN(com.facebook.presto.orc.metadata.KeyProvider.UNKNOWN) Inject(javax.inject.Inject) MetastoreUtil(com.facebook.presto.hive.metastore.MetastoreUtil) Managed(org.weakref.jmx.Managed) TypeManager(com.facebook.presto.common.type.TypeManager) HiveSessionProperties.getOrcMaxBufferSize(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxBufferSize) Objects.requireNonNull(java.util.Objects.requireNonNull) OrcType(com.facebook.presto.orc.metadata.OrcType) OrcWriterStats(com.facebook.presto.orc.OrcWriterStats) Type(com.facebook.presto.common.type.Type) DwrfEncryptionProvider(com.facebook.presto.orc.DwrfEncryptionProvider) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) OrcEncoding(com.facebook.presto.orc.OrcEncoding) Properties(java.util.Properties) DefaultOrcWriterFlushPolicy(com.facebook.presto.orc.DefaultOrcWriterFlushPolicy) HiveSessionProperties.getOrcStreamBufferSize(com.facebook.presto.hive.HiveSessionProperties.getOrcStreamBufferSize) IOException(java.io.IOException) HiveSessionProperties.getOrcOptimizedWriterMaxStripeRows(com.facebook.presto.hive.HiveSessionProperties.getOrcOptimizedWriterMaxStripeRows) JobConf(org.apache.hadoop.mapred.JobConf) Collectors.toList(java.util.stream.Collectors.toList) HiveSessionProperties.getOrcStringStatisticsLimit(com.facebook.presto.hive.HiveSessionProperties.getOrcStringStatisticsLimit) HIVE_WRITER_OPEN_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR) DataSink(com.facebook.presto.common.io.DataSink) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) HdfsOrcDataSource(com.facebook.presto.hive.orc.HdfsOrcDataSource) PrestoException(com.facebook.presto.spi.PrestoException) OrcEncoding(com.facebook.presto.orc.OrcEncoding) IOException(java.io.IOException) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) OrcType(com.facebook.presto.orc.metadata.OrcType) Type(com.facebook.presto.common.type.Type) FileSystem(org.apache.hadoop.fs.FileSystem) DwrfWriterEncryption(com.facebook.presto.orc.DwrfWriterEncryption) Supplier(java.util.function.Supplier)

Aggregations

Type (com.facebook.presto.common.type.Type)3 TypeManager (com.facebook.presto.common.type.TypeManager)3 HIVE_WRITER_OPEN_ERROR (com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR)3 HiveType.toHiveTypes (com.facebook.presto.hive.HiveType.toHiveTypes)3 StorageFormat (com.facebook.presto.hive.metastore.StorageFormat)3 ConnectorSession (com.facebook.presto.spi.ConnectorSession)3 PrestoException (com.facebook.presto.spi.PrestoException)3 Splitter (com.google.common.base.Splitter)3 IOException (java.io.IOException)3 List (java.util.List)3 Objects.requireNonNull (java.util.Objects.requireNonNull)3 Optional (java.util.Optional)3 Properties (java.util.Properties)3 Callable (java.util.concurrent.Callable)3 Collectors.toList (java.util.stream.Collectors.toList)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 Path (org.apache.hadoop.fs.Path)3 META_TABLE_COLUMNS (org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS)3 META_TABLE_COLUMN_TYPES (org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES)3 JobConf (org.apache.hadoop.mapred.JobConf)3