Search in sources :

Example 1 with RcFileCorruptionException

use of io.trino.rcfile.RcFileCorruptionException in project trino by trinodb.

the class RcFilePageSource method getNextPage.

@Override
public Page getNextPage() {
    try {
        // advance in the current batch
        pageId++;
        // if the batch has been consumed, read the next batch
        int currentPageSize = rcFileReader.advance();
        if (currentPageSize < 0) {
            close();
            return null;
        }
        Block[] blocks = new Block[hiveColumnIndexes.length];
        for (int fieldId = 0; fieldId < blocks.length; fieldId++) {
            if (constantBlocks[fieldId] != null) {
                blocks[fieldId] = new RunLengthEncodedBlock(constantBlocks[fieldId], currentPageSize);
            } else {
                blocks[fieldId] = createBlock(currentPageSize, fieldId);
            }
        }
        return new Page(currentPageSize, blocks);
    } catch (TrinoException e) {
        closeAllSuppress(e, this);
        throw e;
    } catch (RcFileCorruptionException e) {
        closeAllSuppress(e, this);
        throw new TrinoException(HIVE_BAD_DATA, format("Corrupted RC file: %s", rcFileReader.getId()), e);
    } catch (IOException | RuntimeException e) {
        closeAllSuppress(e, this);
        throw new TrinoException(HIVE_CURSOR_ERROR, format("Failed to read RC file: %s", rcFileReader.getId()), e);
    }
}
Also used : RcFileCorruptionException(io.trino.rcfile.RcFileCorruptionException) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock) LazyBlock(io.trino.spi.block.LazyBlock) Block(io.trino.spi.block.Block) TrinoException(io.trino.spi.TrinoException) Page(io.trino.spi.Page) IOException(java.io.IOException) RunLengthEncodedBlock(io.trino.spi.block.RunLengthEncodedBlock)

Example 2 with RcFileCorruptionException

use of io.trino.rcfile.RcFileCorruptionException in project trino by trinodb.

the class RcFilePageSourceFactory method createPageSource.

@Override
public Optional<ReaderPageSource> createPageSource(Configuration configuration, ConnectorSession session, Path path, long start, long length, long estimatedFileSize, Properties schema, List<HiveColumnHandle> columns, TupleDomain<HiveColumnHandle> effectivePredicate, Optional<AcidInfo> acidInfo, OptionalInt bucketNumber, boolean originalFile, AcidTransaction transaction) {
    RcFileEncoding rcFileEncoding;
    String deserializerClassName = getDeserializerClassName(schema);
    if (deserializerClassName.equals(LazyBinaryColumnarSerDe.class.getName())) {
        rcFileEncoding = new BinaryRcFileEncoding(timeZone);
    } else if (deserializerClassName.equals(ColumnarSerDe.class.getName())) {
        rcFileEncoding = createTextVectorEncoding(schema);
    } else {
        return Optional.empty();
    }
    checkArgument(acidInfo.isEmpty(), "Acid is not supported");
    List<HiveColumnHandle> projectedReaderColumns = columns;
    Optional<ReaderColumns> readerProjections = projectBaseColumns(columns);
    if (readerProjections.isPresent()) {
        projectedReaderColumns = readerProjections.get().get().stream().map(HiveColumnHandle.class::cast).collect(toImmutableList());
    }
    RcFileDataSource dataSource;
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, configuration);
        FSDataInputStream inputStream = hdfsEnvironment.doAs(session.getIdentity(), () -> fileSystem.open(path));
        if (estimatedFileSize < BUFFER_SIZE.toBytes()) {
            // Handle potentially imprecise file lengths by reading the footer
            try {
                FSDataInputStreamTail fileTail = FSDataInputStreamTail.readTail(path.toString(), estimatedFileSize, inputStream, toIntExact(BUFFER_SIZE.toBytes()));
                dataSource = new MemoryRcFileDataSource(new RcFileDataSourceId(path.toString()), fileTail.getTailSlice());
            } finally {
                inputStream.close();
            }
        } else {
            long fileSize = hdfsEnvironment.doAs(session.getIdentity(), () -> fileSystem.getFileStatus(path).getLen());
            dataSource = new HdfsRcFileDataSource(path.toString(), inputStream, fileSize, stats);
        }
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    length = min(dataSource.getSize() - start, length);
    // Split may be empty now that the correct file size is known
    if (length <= 0) {
        return Optional.of(noProjectionAdaptation(new EmptyPageSource()));
    }
    try {
        ImmutableMap.Builder<Integer, Type> readColumns = ImmutableMap.builder();
        HiveTimestampPrecision timestampPrecision = getTimestampPrecision(session);
        for (HiveColumnHandle column : projectedReaderColumns) {
            readColumns.put(column.getBaseHiveColumnIndex(), column.getHiveType().getType(typeManager, timestampPrecision));
        }
        RcFileReader rcFileReader = new RcFileReader(dataSource, rcFileEncoding, readColumns.buildOrThrow(), new AircompressorCodecFactory(new HadoopCodecFactory(configuration.getClassLoader())), start, length, BUFFER_SIZE);
        ConnectorPageSource pageSource = new RcFilePageSource(rcFileReader, projectedReaderColumns);
        return Optional.of(new ReaderPageSource(pageSource, readerProjections));
    } catch (Throwable e) {
        try {
            dataSource.close();
        } catch (IOException ignored) {
        }
        if (e instanceof TrinoException) {
            throw (TrinoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e instanceof RcFileCorruptionException) {
            throw new TrinoException(HIVE_BAD_DATA, message, e);
        }
        if (e instanceof BlockMissingException) {
            throw new TrinoException(HIVE_MISSING_DATA, message, e);
        }
        throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : FileNotFoundException(java.io.FileNotFoundException) BinaryRcFileEncoding(io.trino.rcfile.binary.BinaryRcFileEncoding) TextRcFileEncoding(io.trino.rcfile.text.TextRcFileEncoding) RcFileEncoding(io.trino.rcfile.RcFileEncoding) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) RcFileCorruptionException(io.trino.rcfile.RcFileCorruptionException) EmptyPageSource(io.trino.spi.connector.EmptyPageSource) FileSystem(org.apache.hadoop.fs.FileSystem) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) RcFileDataSourceId(io.trino.rcfile.RcFileDataSourceId) HiveTimestampPrecision(io.trino.plugin.hive.HiveTimestampPrecision) IOException(java.io.IOException) FSDataInputStreamTail(io.trino.plugin.hive.util.FSDataInputStreamTail) RcFileReader(io.trino.rcfile.RcFileReader) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) TrinoException(io.trino.spi.TrinoException) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) RcFileCorruptionException(io.trino.rcfile.RcFileCorruptionException) ImmutableMap(com.google.common.collect.ImmutableMap) Type(io.trino.spi.type.Type) MemoryRcFileDataSource(io.trino.rcfile.MemoryRcFileDataSource) HadoopCodecFactory(io.trino.rcfile.HadoopCodecFactory) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) TrinoException(io.trino.spi.TrinoException) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) ReaderColumns(io.trino.plugin.hive.ReaderColumns) BinaryRcFileEncoding(io.trino.rcfile.binary.BinaryRcFileEncoding) AircompressorCodecFactory(io.trino.rcfile.AircompressorCodecFactory) RcFileDataSource(io.trino.rcfile.RcFileDataSource) MemoryRcFileDataSource(io.trino.rcfile.MemoryRcFileDataSource)

Aggregations

RcFileCorruptionException (io.trino.rcfile.RcFileCorruptionException)2 TrinoException (io.trino.spi.TrinoException)2 IOException (java.io.IOException)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 HiveColumnHandle (io.trino.plugin.hive.HiveColumnHandle)1 HiveTimestampPrecision (io.trino.plugin.hive.HiveTimestampPrecision)1 ReaderColumns (io.trino.plugin.hive.ReaderColumns)1 ReaderPageSource (io.trino.plugin.hive.ReaderPageSource)1 FSDataInputStreamTail (io.trino.plugin.hive.util.FSDataInputStreamTail)1 AircompressorCodecFactory (io.trino.rcfile.AircompressorCodecFactory)1 HadoopCodecFactory (io.trino.rcfile.HadoopCodecFactory)1 MemoryRcFileDataSource (io.trino.rcfile.MemoryRcFileDataSource)1 RcFileDataSource (io.trino.rcfile.RcFileDataSource)1 RcFileDataSourceId (io.trino.rcfile.RcFileDataSourceId)1 RcFileEncoding (io.trino.rcfile.RcFileEncoding)1 RcFileReader (io.trino.rcfile.RcFileReader)1 BinaryRcFileEncoding (io.trino.rcfile.binary.BinaryRcFileEncoding)1 TextRcFileEncoding (io.trino.rcfile.text.TextRcFileEncoding)1 Page (io.trino.spi.Page)1 Block (io.trino.spi.block.Block)1