Search in sources :

Example 1 with RcFileCorruptionException

use of io.prestosql.rcfile.RcFileCorruptionException in project hetu-core by openlookeng.

the class RcFilePageSource method getNextPage.

@Override
public Page getNextPage() {
    try {
        // advance in the current batch
        pageId++;
        // if the batch has been consumed, read the next batch
        int currentPageSize = rcFileReader.advance();
        if (currentPageSize < 0) {
            close();
            return null;
        }
        Block[] blocks = new Block[hiveColumnIndexes.length];
        for (int fieldId = 0; fieldId < blocks.length; fieldId++) {
            if (constantBlocks[fieldId] != null) {
                blocks[fieldId] = new RunLengthEncodedBlock(constantBlocks[fieldId], currentPageSize);
            } else {
                blocks[fieldId] = createBlock(currentPageSize, fieldId);
            }
        }
        Page page = new Page(currentPageSize, blocks);
        return page;
    } catch (PrestoException e) {
        closeWithSuppression(e);
        throw e;
    } catch (RcFileCorruptionException e) {
        closeWithSuppression(e);
        throw new PrestoException(HIVE_BAD_DATA, format("Corrupted RC file: %s", rcFileReader.getId()), e);
    } catch (IOException | RuntimeException e) {
        closeWithSuppression(e);
        throw new PrestoException(HIVE_CURSOR_ERROR, format("Failed to read RC file: %s", rcFileReader.getId()), e);
    }
}
Also used : RcFileCorruptionException(io.prestosql.rcfile.RcFileCorruptionException) LazyBlock(io.prestosql.spi.block.LazyBlock) RunLengthEncodedBlock(io.prestosql.spi.block.RunLengthEncodedBlock) Block(io.prestosql.spi.block.Block) Page(io.prestosql.spi.Page) PrestoException(io.prestosql.spi.PrestoException) IOException(java.io.IOException) RunLengthEncodedBlock(io.prestosql.spi.block.RunLengthEncodedBlock)

Example 2 with RcFileCorruptionException

use of io.prestosql.rcfile.RcFileCorruptionException in project hetu-core by openlookeng.

the class RcFilePageSourceFactory method createPageSource.

@Override
public Optional<? extends ConnectorPageSource> createPageSource(Configuration configuration, ConnectorSession session, Path path, long start, long length, long fileSize, Properties schema, List<HiveColumnHandle> columns, TupleDomain<HiveColumnHandle> effectivePredicate, Optional<DynamicFilterSupplier> dynamicFilters, Optional<DeleteDeltaLocations> deleteDeltaLocations, Optional<Long> startRowOffsetOfFile, Optional<List<IndexMetadata>> indexes, SplitMetadata splitMetadata, boolean splitCacheable, long dataSourceLastModifiedTime) {
    RcFileEncoding rcFileEncoding;
    String deserializerClassName = getDeserializerClassName(schema);
    if (deserializerClassName.equals(LazyBinaryColumnarSerDe.class.getName())) {
        rcFileEncoding = new BinaryRcFileEncoding(timeZone);
    } else if (deserializerClassName.equals(ColumnarSerDe.class.getName())) {
        rcFileEncoding = createTextVectorEncoding(schema);
    } else {
        return Optional.empty();
    }
    checkArgument(!deleteDeltaLocations.isPresent(), "Delete delta is not supported");
    if (fileSize == 0) {
        throw new PrestoException(HIVE_BAD_DATA, "RCFile is empty: " + path);
    }
    FSDataInputStream inputStream;
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
        inputStream = hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.open(path));
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    try {
        ImmutableMap.Builder<Integer, Type> readColumns = ImmutableMap.builder();
        for (HiveColumnHandle column : columns) {
            readColumns.put(column.getHiveColumnIndex(), column.getHiveType().getType(typeManager));
        }
        RcFileReader rcFileReader = new RcFileReader(new HdfsRcFileDataSource(path.toString(), inputStream, fileSize, stats), rcFileEncoding, readColumns.build(), new AircompressorCodecFactory(new HadoopCodecFactory(configuration.getClassLoader())), start, length, new DataSize(8, Unit.MEGABYTE));
        return Optional.of(new RcFilePageSource(rcFileReader, columns, typeManager));
    } catch (Throwable e) {
        try {
            inputStream.close();
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e instanceof RcFileCorruptionException) {
            throw new PrestoException(HIVE_BAD_DATA, message, e);
        }
        if (e instanceof BlockMissingException) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : FileNotFoundException(java.io.FileNotFoundException) PrestoException(io.prestosql.spi.PrestoException) RcFileEncoding(io.prestosql.rcfile.RcFileEncoding) TextRcFileEncoding(io.prestosql.rcfile.text.TextRcFileEncoding) BinaryRcFileEncoding(io.prestosql.rcfile.binary.BinaryRcFileEncoding) IOException(java.io.IOException) RcFileReader(io.prestosql.rcfile.RcFileReader) RcFileCorruptionException(io.prestosql.rcfile.RcFileCorruptionException) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) PrestoException(io.prestosql.spi.PrestoException) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) ImmutableMap(com.google.common.collect.ImmutableMap) Type(io.prestosql.spi.type.Type) RcFileCorruptionException(io.prestosql.rcfile.RcFileCorruptionException) HadoopCodecFactory(io.prestosql.rcfile.HadoopCodecFactory) FileSystem(org.apache.hadoop.fs.FileSystem) DataSize(io.airlift.units.DataSize) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) BinaryRcFileEncoding(io.prestosql.rcfile.binary.BinaryRcFileEncoding) AircompressorCodecFactory(io.prestosql.rcfile.AircompressorCodecFactory) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle)

Aggregations

RcFileCorruptionException (io.prestosql.rcfile.RcFileCorruptionException)2 PrestoException (io.prestosql.spi.PrestoException)2 IOException (java.io.IOException)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 DataSize (io.airlift.units.DataSize)1 HiveColumnHandle (io.prestosql.plugin.hive.HiveColumnHandle)1 AircompressorCodecFactory (io.prestosql.rcfile.AircompressorCodecFactory)1 HadoopCodecFactory (io.prestosql.rcfile.HadoopCodecFactory)1 RcFileEncoding (io.prestosql.rcfile.RcFileEncoding)1 RcFileReader (io.prestosql.rcfile.RcFileReader)1 BinaryRcFileEncoding (io.prestosql.rcfile.binary.BinaryRcFileEncoding)1 TextRcFileEncoding (io.prestosql.rcfile.text.TextRcFileEncoding)1 Page (io.prestosql.spi.Page)1 Block (io.prestosql.spi.block.Block)1 LazyBlock (io.prestosql.spi.block.LazyBlock)1 RunLengthEncodedBlock (io.prestosql.spi.block.RunLengthEncodedBlock)1 Type (io.prestosql.spi.type.Type)1 FileNotFoundException (java.io.FileNotFoundException)1 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)1 FileSystem (org.apache.hadoop.fs.FileSystem)1