Search in sources :

Example 1 with AircompressorCodecFactory

use of com.facebook.presto.rcfile.AircompressorCodecFactory in project presto by prestodb.

the class RcFilePageSourceFactory method createPageSource.

@Override
public Optional<? extends ConnectorPageSource> createPageSource(Configuration configuration, ConnectorSession session, Path path, long start, long length, long fileSize, Storage storage, SchemaTableName tableName, Map<String, String> tableParameters, List<HiveColumnHandle> columns, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, HiveFileContext hiveFileContext, Optional<EncryptionInformation> encryptionInformation) {
    if (!columns.isEmpty() && columns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
        throw new UnsupportedOperationException("Partial aggregation pushdown only supported for ORC/Parquet files. " + "Table " + tableName.toString() + " has file (" + path.toString() + ") of format " + storage.getStorageFormat().getOutputFormat() + ". Set session property hive.pushdown_partial_aggregations_into_scan=false and execute query again");
    }
    RcFileEncoding rcFileEncoding;
    if (LazyBinaryColumnarSerDe.class.getName().equals(storage.getStorageFormat().getSerDe())) {
        rcFileEncoding = new BinaryRcFileEncoding();
    } else if (ColumnarSerDe.class.getName().equals(storage.getStorageFormat().getSerDe())) {
        rcFileEncoding = createTextVectorEncoding(getHiveSchema(storage.getSerdeParameters(), tableParameters), hiveStorageTimeZone);
    } else {
        return Optional.empty();
    }
    if (fileSize == 0) {
        throw new PrestoException(HIVE_BAD_DATA, "RCFile is empty: " + path);
    }
    FSDataInputStream inputStream;
    try {
        inputStream = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration).openFile(path, hiveFileContext);
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    try {
        ImmutableMap.Builder<Integer, Type> readColumns = ImmutableMap.builder();
        for (HiveColumnHandle column : columns) {
            readColumns.put(column.getHiveColumnIndex(), column.getHiveType().getType(typeManager));
        }
        RcFileReader rcFileReader = new RcFileReader(new HdfsRcFileDataSource(path.toString(), inputStream, fileSize, stats), rcFileEncoding, readColumns.build(), new AircompressorCodecFactory(new HadoopCodecFactory(configuration.getClassLoader())), start, length, new DataSize(8, Unit.MEGABYTE));
        return Optional.of(new RcFilePageSource(rcFileReader, columns, typeManager));
    } catch (Throwable e) {
        try {
            inputStream.close();
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e instanceof RcFileCorruptionException) {
            throw new PrestoException(HIVE_BAD_DATA, message, e);
        }
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) SERIALIZATION_LAST_COLUMN_TAKES_REST(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST) FIELD_DELIM(org.apache.hadoop.hive.serde.serdeConstants.FIELD_DELIM) SERIALIZATION_NULL_FORMAT(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_NULL_FORMAT) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) Unit(io.airlift.units.DataSize.Unit) RcFileCorruptionException(com.facebook.presto.rcfile.RcFileCorruptionException) AGGREGATED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.AGGREGATED) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ESCAPE_CHAR(org.apache.hadoop.hive.serde.serdeConstants.ESCAPE_CHAR) ImmutableMap(com.google.common.collect.ImmutableMap) HiveFileContext(com.facebook.presto.hive.HiveFileContext) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) DEFAULT_NULL_SEQUENCE(com.facebook.presto.rcfile.text.TextRcFileEncoding.DEFAULT_NULL_SEQUENCE) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveBatchPageSourceFactory(com.facebook.presto.hive.HiveBatchPageSourceFactory) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) Slice(io.airlift.slice.Slice) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) LazyUtils.getByte(org.apache.hadoop.hive.serde2.lazy.LazyUtils.getByte) PrestoException(com.facebook.presto.spi.PrestoException) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) HadoopCodecFactory(com.facebook.presto.rcfile.HadoopCodecFactory) Inject(javax.inject.Inject) SERIALIZATION_EXTEND_NESTING_LEVELS(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters.SERIALIZATION_EXTEND_NESTING_LEVELS) MAPKEY_DELIM(org.apache.hadoop.hive.serde.serdeConstants.MAPKEY_DELIM) SERIALIZATION_FORMAT(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT) TypeManager(com.facebook.presto.common.type.TypeManager) DEFAULT_SEPARATORS(com.facebook.presto.rcfile.text.TextRcFileEncoding.DEFAULT_SEPARATORS) Objects.requireNonNull(java.util.Objects.requireNonNull) COLLECTION_DELIM(org.apache.hadoop.hive.serde.serdeConstants.COLLECTION_DELIM) HIVE_BAD_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_BAD_DATA) Type(com.facebook.presto.common.type.Type) AircompressorCodecFactory(com.facebook.presto.rcfile.AircompressorCodecFactory) Storage(com.facebook.presto.hive.metastore.Storage) Properties(java.util.Properties) RcFileReader(com.facebook.presto.rcfile.RcFileReader) IOException(java.io.IOException) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) TextRcFileEncoding(com.facebook.presto.rcfile.text.TextRcFileEncoding) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) MetastoreUtil.getHiveSchema(com.facebook.presto.hive.metastore.MetastoreUtil.getHiveSchema) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) TextRcFileEncoding(com.facebook.presto.rcfile.text.TextRcFileEncoding) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) IOException(java.io.IOException) RcFileReader(com.facebook.presto.rcfile.RcFileReader) RcFileCorruptionException(com.facebook.presto.rcfile.RcFileCorruptionException) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) ImmutableMap(com.google.common.collect.ImmutableMap) Type(com.facebook.presto.common.type.Type) RcFileCorruptionException(com.facebook.presto.rcfile.RcFileCorruptionException) HadoopCodecFactory(com.facebook.presto.rcfile.HadoopCodecFactory) DataSize(io.airlift.units.DataSize) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) AircompressorCodecFactory(com.facebook.presto.rcfile.AircompressorCodecFactory) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle)

Example 2 with AircompressorCodecFactory

use of com.facebook.presto.rcfile.AircompressorCodecFactory in project presto by prestodb.

the class RcFilePageSourceFactory method createPageSource.

@Override
public Optional<? extends ConnectorPageSource> createPageSource(Configuration configuration, ConnectorSession session, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone) {
    if (!isRcfileOptimizedReaderEnabled(session)) {
        return Optional.empty();
    }
    RcFileEncoding rcFileEncoding;
    String deserializerClassName = getDeserializerClassName(schema);
    if (deserializerClassName.equals(LazyBinaryColumnarSerDe.class.getName())) {
        rcFileEncoding = new BinaryRcFileEncoding();
    } else if (deserializerClassName.equals(ColumnarSerDe.class.getName())) {
        rcFileEncoding = createTextVectorEncoding(schema, hiveStorageTimeZone);
    } else {
        return Optional.empty();
    }
    long size;
    FSDataInputStream inputStream;
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
        size = fileSystem.getFileStatus(path).getLen();
        inputStream = fileSystem.open(path);
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    try {
        ImmutableMap.Builder<Integer, Type> readColumns = ImmutableMap.builder();
        for (HiveColumnHandle column : columns) {
            readColumns.put(column.getHiveColumnIndex(), column.getHiveType().getType(typeManager));
        }
        RcFileReader rcFileReader = new RcFileReader(new HdfsRcFileDataSource(path.toString(), inputStream, size), rcFileEncoding, readColumns.build(), new AircompressorCodecFactory(new HadoopCodecFactory(configuration.getClassLoader())), start, length, new DataSize(1, Unit.MEGABYTE));
        return Optional.of(new RcFilePageSource(rcFileReader, columns, hiveStorageTimeZone, typeManager));
    } catch (Throwable e) {
        try {
            inputStream.close();
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) TextRcFileEncoding(com.facebook.presto.rcfile.text.TextRcFileEncoding) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) IOException(java.io.IOException) RcFileReader(com.facebook.presto.rcfile.RcFileReader) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) ImmutableMap(com.google.common.collect.ImmutableMap) Type(com.facebook.presto.spi.type.Type) HadoopCodecFactory(com.facebook.presto.rcfile.HadoopCodecFactory) FileSystem(org.apache.hadoop.fs.FileSystem) DataSize(io.airlift.units.DataSize) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) AircompressorCodecFactory(com.facebook.presto.rcfile.AircompressorCodecFactory) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle)

Aggregations

HiveColumnHandle (com.facebook.presto.hive.HiveColumnHandle)2 AircompressorCodecFactory (com.facebook.presto.rcfile.AircompressorCodecFactory)2 HadoopCodecFactory (com.facebook.presto.rcfile.HadoopCodecFactory)2 RcFileEncoding (com.facebook.presto.rcfile.RcFileEncoding)2 RcFileReader (com.facebook.presto.rcfile.RcFileReader)2 BinaryRcFileEncoding (com.facebook.presto.rcfile.binary.BinaryRcFileEncoding)2 TextRcFileEncoding (com.facebook.presto.rcfile.text.TextRcFileEncoding)2 PrestoException (com.facebook.presto.spi.PrestoException)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 DataSize (io.airlift.units.DataSize)2 FileNotFoundException (java.io.FileNotFoundException)2 IOException (java.io.IOException)2 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)1 Type (com.facebook.presto.common.type.Type)1 TypeManager (com.facebook.presto.common.type.TypeManager)1 EncryptionInformation (com.facebook.presto.hive.EncryptionInformation)1 FileFormatDataSourceStats (com.facebook.presto.hive.FileFormatDataSourceStats)1 HdfsEnvironment (com.facebook.presto.hive.HdfsEnvironment)1 HiveBatchPageSourceFactory (com.facebook.presto.hive.HiveBatchPageSourceFactory)1 AGGREGATED (com.facebook.presto.hive.HiveColumnHandle.ColumnType.AGGREGATED)1