Search in sources :

Example 1 with RcFileReader

use of io.prestosql.rcfile.RcFileReader in project hetu-core by openlookeng.

the class RcFilePageSourceFactory method createPageSource.

@Override
public Optional<? extends ConnectorPageSource> createPageSource(Configuration configuration, ConnectorSession session, Path path, long start, long length, long fileSize, Properties schema, List<HiveColumnHandle> columns, TupleDomain<HiveColumnHandle> effectivePredicate, Optional<DynamicFilterSupplier> dynamicFilters, Optional<DeleteDeltaLocations> deleteDeltaLocations, Optional<Long> startRowOffsetOfFile, Optional<List<IndexMetadata>> indexes, SplitMetadata splitMetadata, boolean splitCacheable, long dataSourceLastModifiedTime) {
    RcFileEncoding rcFileEncoding;
    String deserializerClassName = getDeserializerClassName(schema);
    if (deserializerClassName.equals(LazyBinaryColumnarSerDe.class.getName())) {
        rcFileEncoding = new BinaryRcFileEncoding(timeZone);
    } else if (deserializerClassName.equals(ColumnarSerDe.class.getName())) {
        rcFileEncoding = createTextVectorEncoding(schema);
    } else {
        return Optional.empty();
    }
    checkArgument(!deleteDeltaLocations.isPresent(), "Delete delta is not supported");
    if (fileSize == 0) {
        throw new PrestoException(HIVE_BAD_DATA, "RCFile is empty: " + path);
    }
    FSDataInputStream inputStream;
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
        inputStream = hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.open(path));
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    try {
        ImmutableMap.Builder<Integer, Type> readColumns = ImmutableMap.builder();
        for (HiveColumnHandle column : columns) {
            readColumns.put(column.getHiveColumnIndex(), column.getHiveType().getType(typeManager));
        }
        RcFileReader rcFileReader = new RcFileReader(new HdfsRcFileDataSource(path.toString(), inputStream, fileSize, stats), rcFileEncoding, readColumns.build(), new AircompressorCodecFactory(new HadoopCodecFactory(configuration.getClassLoader())), start, length, new DataSize(8, Unit.MEGABYTE));
        return Optional.of(new RcFilePageSource(rcFileReader, columns, typeManager));
    } catch (Throwable e) {
        try {
            inputStream.close();
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e instanceof RcFileCorruptionException) {
            throw new PrestoException(HIVE_BAD_DATA, message, e);
        }
        if (e instanceof BlockMissingException) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : FileNotFoundException(java.io.FileNotFoundException) PrestoException(io.prestosql.spi.PrestoException) RcFileEncoding(io.prestosql.rcfile.RcFileEncoding) TextRcFileEncoding(io.prestosql.rcfile.text.TextRcFileEncoding) BinaryRcFileEncoding(io.prestosql.rcfile.binary.BinaryRcFileEncoding) IOException(java.io.IOException) RcFileReader(io.prestosql.rcfile.RcFileReader) RcFileCorruptionException(io.prestosql.rcfile.RcFileCorruptionException) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) PrestoException(io.prestosql.spi.PrestoException) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) ImmutableMap(com.google.common.collect.ImmutableMap) Type(io.prestosql.spi.type.Type) RcFileCorruptionException(io.prestosql.rcfile.RcFileCorruptionException) HadoopCodecFactory(io.prestosql.rcfile.HadoopCodecFactory) FileSystem(org.apache.hadoop.fs.FileSystem) DataSize(io.airlift.units.DataSize) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) BinaryRcFileEncoding(io.prestosql.rcfile.binary.BinaryRcFileEncoding) AircompressorCodecFactory(io.prestosql.rcfile.AircompressorCodecFactory) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle)

Aggregations

ImmutableMap (com.google.common.collect.ImmutableMap)1 DataSize (io.airlift.units.DataSize)1 HiveColumnHandle (io.prestosql.plugin.hive.HiveColumnHandle)1 AircompressorCodecFactory (io.prestosql.rcfile.AircompressorCodecFactory)1 HadoopCodecFactory (io.prestosql.rcfile.HadoopCodecFactory)1 RcFileCorruptionException (io.prestosql.rcfile.RcFileCorruptionException)1 RcFileEncoding (io.prestosql.rcfile.RcFileEncoding)1 RcFileReader (io.prestosql.rcfile.RcFileReader)1 BinaryRcFileEncoding (io.prestosql.rcfile.binary.BinaryRcFileEncoding)1 TextRcFileEncoding (io.prestosql.rcfile.text.TextRcFileEncoding)1 PrestoException (io.prestosql.spi.PrestoException)1 Type (io.prestosql.spi.type.Type)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 BlockMissingException (org.apache.hadoop.hdfs.BlockMissingException)1 LazyBinaryColumnarSerDe (org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe)1