Search in sources :

Example 1 with RcFileCorruptionException

use of com.facebook.presto.rcfile.RcFileCorruptionException in project presto by prestodb.

the class RcFilePageSourceFactory method createPageSource.

@Override
public Optional<? extends ConnectorPageSource> createPageSource(Configuration configuration, ConnectorSession session, Path path, long start, long length, long fileSize, Storage storage, SchemaTableName tableName, Map<String, String> tableParameters, List<HiveColumnHandle> columns, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, HiveFileContext hiveFileContext, Optional<EncryptionInformation> encryptionInformation) {
    if (!columns.isEmpty() && columns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
        throw new UnsupportedOperationException("Partial aggregation pushdown only supported for ORC/Parquet files. " + "Table " + tableName.toString() + " has file (" + path.toString() + ") of format " + storage.getStorageFormat().getOutputFormat() + ". Set session property hive.pushdown_partial_aggregations_into_scan=false and execute query again");
    }
    RcFileEncoding rcFileEncoding;
    if (LazyBinaryColumnarSerDe.class.getName().equals(storage.getStorageFormat().getSerDe())) {
        rcFileEncoding = new BinaryRcFileEncoding();
    } else if (ColumnarSerDe.class.getName().equals(storage.getStorageFormat().getSerDe())) {
        rcFileEncoding = createTextVectorEncoding(getHiveSchema(storage.getSerdeParameters(), tableParameters), hiveStorageTimeZone);
    } else {
        return Optional.empty();
    }
    if (fileSize == 0) {
        throw new PrestoException(HIVE_BAD_DATA, "RCFile is empty: " + path);
    }
    FSDataInputStream inputStream;
    try {
        inputStream = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration).openFile(path, hiveFileContext);
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    try {
        ImmutableMap.Builder<Integer, Type> readColumns = ImmutableMap.builder();
        for (HiveColumnHandle column : columns) {
            readColumns.put(column.getHiveColumnIndex(), column.getHiveType().getType(typeManager));
        }
        RcFileReader rcFileReader = new RcFileReader(new HdfsRcFileDataSource(path.toString(), inputStream, fileSize, stats), rcFileEncoding, readColumns.build(), new AircompressorCodecFactory(new HadoopCodecFactory(configuration.getClassLoader())), start, length, new DataSize(8, Unit.MEGABYTE));
        return Optional.of(new RcFilePageSource(rcFileReader, columns, typeManager));
    } catch (Throwable e) {
        try {
            inputStream.close();
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e instanceof RcFileCorruptionException) {
            throw new PrestoException(HIVE_BAD_DATA, message, e);
        }
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) SERIALIZATION_LAST_COLUMN_TAKES_REST(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST) FIELD_DELIM(org.apache.hadoop.hive.serde.serdeConstants.FIELD_DELIM) SERIALIZATION_NULL_FORMAT(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_NULL_FORMAT) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) Unit(io.airlift.units.DataSize.Unit) RcFileCorruptionException(com.facebook.presto.rcfile.RcFileCorruptionException) AGGREGATED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.AGGREGATED) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ESCAPE_CHAR(org.apache.hadoop.hive.serde.serdeConstants.ESCAPE_CHAR) ImmutableMap(com.google.common.collect.ImmutableMap) HiveFileContext(com.facebook.presto.hive.HiveFileContext) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) DEFAULT_NULL_SEQUENCE(com.facebook.presto.rcfile.text.TextRcFileEncoding.DEFAULT_NULL_SEQUENCE) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveBatchPageSourceFactory(com.facebook.presto.hive.HiveBatchPageSourceFactory) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) Slice(io.airlift.slice.Slice) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) LazyUtils.getByte(org.apache.hadoop.hive.serde2.lazy.LazyUtils.getByte) PrestoException(com.facebook.presto.spi.PrestoException) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) HadoopCodecFactory(com.facebook.presto.rcfile.HadoopCodecFactory) Inject(javax.inject.Inject) SERIALIZATION_EXTEND_NESTING_LEVELS(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters.SERIALIZATION_EXTEND_NESTING_LEVELS) MAPKEY_DELIM(org.apache.hadoop.hive.serde.serdeConstants.MAPKEY_DELIM) SERIALIZATION_FORMAT(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT) TypeManager(com.facebook.presto.common.type.TypeManager) DEFAULT_SEPARATORS(com.facebook.presto.rcfile.text.TextRcFileEncoding.DEFAULT_SEPARATORS) Objects.requireNonNull(java.util.Objects.requireNonNull) COLLECTION_DELIM(org.apache.hadoop.hive.serde.serdeConstants.COLLECTION_DELIM) HIVE_BAD_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_BAD_DATA) Type(com.facebook.presto.common.type.Type) AircompressorCodecFactory(com.facebook.presto.rcfile.AircompressorCodecFactory) Storage(com.facebook.presto.hive.metastore.Storage) Properties(java.util.Properties) RcFileReader(com.facebook.presto.rcfile.RcFileReader) IOException(java.io.IOException) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) TextRcFileEncoding(com.facebook.presto.rcfile.text.TextRcFileEncoding) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) MetastoreUtil.getHiveSchema(com.facebook.presto.hive.metastore.MetastoreUtil.getHiveSchema) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) TextRcFileEncoding(com.facebook.presto.rcfile.text.TextRcFileEncoding) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) IOException(java.io.IOException) RcFileReader(com.facebook.presto.rcfile.RcFileReader) RcFileCorruptionException(com.facebook.presto.rcfile.RcFileCorruptionException) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) ImmutableMap(com.google.common.collect.ImmutableMap) Type(com.facebook.presto.common.type.Type) RcFileCorruptionException(com.facebook.presto.rcfile.RcFileCorruptionException) HadoopCodecFactory(com.facebook.presto.rcfile.HadoopCodecFactory) DataSize(io.airlift.units.DataSize) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) AircompressorCodecFactory(com.facebook.presto.rcfile.AircompressorCodecFactory) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle)

Example 2 with RcFileCorruptionException

use of com.facebook.presto.rcfile.RcFileCorruptionException in project presto by prestodb.

the class RcFilePageSource method getNextPage.

@Override
public Page getNextPage() {
    try {
        // advance in the current batch
        pageId++;
        // if the batch has been consumed, read the next batch
        int currentPageSize = rcFileReader.advance();
        if (currentPageSize < 0) {
            close();
            return null;
        }
        completedPositions += currentPageSize;
        Block[] blocks = new Block[hiveColumnIndexes.length];
        for (int fieldId = 0; fieldId < blocks.length; fieldId++) {
            if (constantBlocks[fieldId] != null) {
                blocks[fieldId] = new RunLengthEncodedBlock(constantBlocks[fieldId], currentPageSize);
            } else {
                blocks[fieldId] = createBlock(currentPageSize, fieldId);
            }
        }
        return new Page(currentPageSize, blocks);
    } catch (PrestoException e) {
        closeWithSuppression(e);
        throw e;
    } catch (RcFileCorruptionException e) {
        closeWithSuppression(e);
        throw new PrestoException(HIVE_BAD_DATA, format("Corrupted RC file: %s", rcFileReader.getId()), e);
    } catch (IOException | RuntimeException e) {
        closeWithSuppression(e);
        throw new PrestoException(HIVE_CURSOR_ERROR, format("Failed to read RC file: %s", rcFileReader.getId()), e);
    }
}
Also used : RcFileCorruptionException(com.facebook.presto.rcfile.RcFileCorruptionException) RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) Block(com.facebook.presto.common.block.Block) LazyBlock(com.facebook.presto.common.block.LazyBlock) Page(com.facebook.presto.common.Page) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock)

Aggregations

RcFileCorruptionException (com.facebook.presto.rcfile.RcFileCorruptionException)2 PrestoException (com.facebook.presto.spi.PrestoException)2 IOException (java.io.IOException)2 Page (com.facebook.presto.common.Page)1 Block (com.facebook.presto.common.block.Block)1 LazyBlock (com.facebook.presto.common.block.LazyBlock)1 RunLengthEncodedBlock (com.facebook.presto.common.block.RunLengthEncodedBlock)1 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)1 Type (com.facebook.presto.common.type.Type)1 TypeManager (com.facebook.presto.common.type.TypeManager)1 EncryptionInformation (com.facebook.presto.hive.EncryptionInformation)1 FileFormatDataSourceStats (com.facebook.presto.hive.FileFormatDataSourceStats)1 HdfsEnvironment (com.facebook.presto.hive.HdfsEnvironment)1 HiveBatchPageSourceFactory (com.facebook.presto.hive.HiveBatchPageSourceFactory)1 HiveColumnHandle (com.facebook.presto.hive.HiveColumnHandle)1 AGGREGATED (com.facebook.presto.hive.HiveColumnHandle.ColumnType.AGGREGATED)1 HIVE_BAD_DATA (com.facebook.presto.hive.HiveErrorCode.HIVE_BAD_DATA)1 HIVE_CANNOT_OPEN_SPLIT (com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT)1 HIVE_MISSING_DATA (com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA)1 HiveFileContext (com.facebook.presto.hive.HiveFileContext)1