Search in sources :

Example 1 with BinaryRcFileEncoding

use of com.facebook.presto.rcfile.binary.BinaryRcFileEncoding in project presto by prestodb.

the class TestRcFileReaderManual method readValues.

private static List<Integer> readValues(Slice data, int offset, int length) throws IOException {
    if (offset < 0) {
        // adjust length to new offset
        length += offset;
        offset = 0;
    }
    if (offset + length > data.length()) {
        length = data.length() - offset;
    }
    RcFileReader reader = new RcFileReader(new SliceRcFileDataSource(data), new BinaryRcFileEncoding(), ImmutableMap.of(0, SMALLINT), new BogusRcFileCodecFactory(), offset, length, new DataSize(1, MEGABYTE));
    ImmutableList.Builder<Integer> values = ImmutableList.builder();
    while (reader.advance() >= 0) {
        Block block = reader.readBlock(0);
        for (int position = 0; position < block.getPositionCount(); position++) {
            values.add((int) SMALLINT.getLong(block, position));
        }
    }
    return values.build();
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) DataSize(io.airlift.units.DataSize) Block(com.facebook.presto.spi.block.Block) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding)

Example 2 with BinaryRcFileEncoding

use of com.facebook.presto.rcfile.binary.BinaryRcFileEncoding in project presto by prestodb.

the class RcFileFileWriterFactory method createFileWriter.

@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session) {
    if (!HiveSessionProperties.isRcfileOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }
    if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }
    RcFileEncoding rcFileEncoding;
    if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
        rcFileEncoding = new BinaryRcFileEncoding();
    } else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
        rcFileEncoding = createTextVectorEncoding(schema, hiveStorageTimeZone);
    } else {
        return Optional.empty();
    }
    Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));
    // existing tables and partitions may have columns in a different order than the writer is providing, so build
    // and index to rearrange columns in the proper order
    List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings().splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
    List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
        OutputStream outputStream = fileSystem.create(path);
        Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty();
        if (HiveSessionProperties.isRcfileOptimizedWriterValidate(session)) {
            validationInputFactory = Optional.of(() -> {
                try {
                    return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path), fileSystem.getFileStatus(path).getLen());
                } catch (IOException e) {
                    throw Throwables.propagate(e);
                }
            });
        }
        return Optional.of(new RcFileFileWriter(outputStream, rcFileEncoding, fileColumnTypes, codecName, fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()).put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), validationInputFactory));
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) TypeManager(com.facebook.presto.spi.type.TypeManager) FileSystem(org.apache.hadoop.fs.FileSystem) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) Supplier(java.util.function.Supplier) Inject(javax.inject.Inject) Type(com.facebook.presto.spi.type.Type) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) Splitter(com.google.common.base.Splitter) OutputStream(java.io.OutputStream) RcFileDataSource(com.facebook.presto.rcfile.RcFileDataSource) META_TABLE_COLUMN_TYPES(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES) Properties(java.util.Properties) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) ImmutableMap(com.google.common.collect.ImmutableMap) HdfsRcFileDataSource(com.facebook.presto.hive.rcfile.HdfsRcFileDataSource) RCFileOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat) Throwables(com.google.common.base.Throwables) IOException(java.io.IOException) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) JobConf(org.apache.hadoop.mapred.JobConf) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) FileOutputFormat(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) RcFilePageSourceFactory.createTextVectorEncoding(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory.createTextVectorEncoding) Optional(java.util.Optional) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) HiveType.toHiveTypes(com.facebook.presto.hive.HiveType.toHiveTypes) OutputStream(java.io.OutputStream) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) IOException(java.io.IOException) IOException(java.io.IOException) Type(com.facebook.presto.spi.type.Type) HdfsRcFileDataSource(com.facebook.presto.hive.rcfile.HdfsRcFileDataSource) FileSystem(org.apache.hadoop.fs.FileSystem) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) Supplier(java.util.function.Supplier) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding)

Example 3 with BinaryRcFileEncoding

use of com.facebook.presto.rcfile.binary.BinaryRcFileEncoding in project presto by prestodb.

the class RcFilePageSourceFactory method createPageSource.

@Override
public Optional<? extends ConnectorPageSource> createPageSource(Configuration configuration, ConnectorSession session, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone) {
    if (!isRcfileOptimizedReaderEnabled(session)) {
        return Optional.empty();
    }
    RcFileEncoding rcFileEncoding;
    String deserializerClassName = getDeserializerClassName(schema);
    if (deserializerClassName.equals(LazyBinaryColumnarSerDe.class.getName())) {
        rcFileEncoding = new BinaryRcFileEncoding();
    } else if (deserializerClassName.equals(ColumnarSerDe.class.getName())) {
        rcFileEncoding = createTextVectorEncoding(schema, hiveStorageTimeZone);
    } else {
        return Optional.empty();
    }
    long size;
    FSDataInputStream inputStream;
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
        size = fileSystem.getFileStatus(path).getLen();
        inputStream = fileSystem.open(path);
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    try {
        ImmutableMap.Builder<Integer, Type> readColumns = ImmutableMap.builder();
        for (HiveColumnHandle column : columns) {
            readColumns.put(column.getHiveColumnIndex(), column.getHiveType().getType(typeManager));
        }
        RcFileReader rcFileReader = new RcFileReader(new HdfsRcFileDataSource(path.toString(), inputStream, size), rcFileEncoding, readColumns.build(), new AircompressorCodecFactory(new HadoopCodecFactory(configuration.getClassLoader())), start, length, new DataSize(1, Unit.MEGABYTE));
        return Optional.of(new RcFilePageSource(rcFileReader, columns, hiveStorageTimeZone, typeManager));
    } catch (Throwable e) {
        try {
            inputStream.close();
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) TextRcFileEncoding(com.facebook.presto.rcfile.text.TextRcFileEncoding) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) IOException(java.io.IOException) RcFileReader(com.facebook.presto.rcfile.RcFileReader) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) ImmutableMap(com.google.common.collect.ImmutableMap) Type(com.facebook.presto.spi.type.Type) HadoopCodecFactory(com.facebook.presto.rcfile.HadoopCodecFactory) FileSystem(org.apache.hadoop.fs.FileSystem) DataSize(io.airlift.units.DataSize) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) AircompressorCodecFactory(com.facebook.presto.rcfile.AircompressorCodecFactory) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle)

Aggregations

BinaryRcFileEncoding (com.facebook.presto.rcfile.binary.BinaryRcFileEncoding)3 RcFileEncoding (com.facebook.presto.rcfile.RcFileEncoding)2 Type (com.facebook.presto.spi.type.Type)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 DataSize (io.airlift.units.DataSize)2 IOException (java.io.IOException)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 LazyBinaryColumnarSerDe (org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe)2 HiveColumnHandle (com.facebook.presto.hive.HiveColumnHandle)1 HiveType.toHiveTypes (com.facebook.presto.hive.HiveType.toHiveTypes)1 StorageFormat (com.facebook.presto.hive.metastore.StorageFormat)1 HdfsRcFileDataSource (com.facebook.presto.hive.rcfile.HdfsRcFileDataSource)1 RcFilePageSourceFactory.createTextVectorEncoding (com.facebook.presto.hive.rcfile.RcFilePageSourceFactory.createTextVectorEncoding)1 AircompressorCodecFactory (com.facebook.presto.rcfile.AircompressorCodecFactory)1 HadoopCodecFactory (com.facebook.presto.rcfile.HadoopCodecFactory)1 RcFileDataSource (com.facebook.presto.rcfile.RcFileDataSource)1 RcFileReader (com.facebook.presto.rcfile.RcFileReader)1 TextRcFileEncoding (com.facebook.presto.rcfile.text.TextRcFileEncoding)1 ConnectorSession (com.facebook.presto.spi.ConnectorSession)1 PrestoException (com.facebook.presto.spi.PrestoException)1