Search in sources :

Example 21 with TypeManager

use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.

the class TableStatisticsMaker method makeTableStatistics.

private TableStatistics makeTableStatistics(IcebergTableHandle tableHandle, Constraint constraint) {
    if (!tableHandle.getSnapshotId().isPresent() || constraint.getSummary().isNone()) {
        return TableStatistics.empty();
    }
    TupleDomain<IcebergColumnHandle> intersection = constraint.getSummary().transform(IcebergColumnHandle.class::cast).intersect(tableHandle.getPredicate());
    if (intersection.isNone()) {
        return TableStatistics.empty();
    }
    List<Types.NestedField> columns = icebergTable.schema().columns();
    Map<Integer, Type.PrimitiveType> idToTypeMapping = columns.stream().filter(column -> column.type().isPrimitiveType()).collect(Collectors.toMap(Types.NestedField::fieldId, column -> column.type().asPrimitiveType()));
    List<PartitionField> partitionFields = icebergTable.spec().fields();
    Set<Integer> identityPartitionIds = getIdentityPartitions(icebergTable.spec()).keySet().stream().map(PartitionField::sourceId).collect(toSet());
    List<Types.NestedField> nonPartitionPrimitiveColumns = columns.stream().filter(column -> !identityPartitionIds.contains(column.fieldId()) && column.type().isPrimitiveType()).collect(toImmutableList());
    List<Type> icebergPartitionTypes = partitionTypes(partitionFields, idToTypeMapping);
    List<IcebergColumnHandle> columnHandles = getColumns(icebergTable.schema(), typeManager);
    Map<Integer, IcebergColumnHandle> idToColumnHandle = columnHandles.stream().collect(toImmutableMap(IcebergColumnHandle::getId, identity()));
    ImmutableMap.Builder<Integer, ColumnFieldDetails> idToDetailsBuilder = ImmutableMap.builder();
    for (int index = 0; index < partitionFields.size(); index++) {
        PartitionField field = partitionFields.get(index);
        Type type = icebergPartitionTypes.get(index);
        idToDetailsBuilder.put(field.sourceId(), new ColumnFieldDetails(field, idToColumnHandle.get(field.sourceId()), type, toPrestoType(type, typeManager), type.typeId().javaClass()));
    }
    Map<Integer, ColumnFieldDetails> idToDetails = idToDetailsBuilder.build();
    TableScan tableScan = icebergTable.newScan().filter(toIcebergExpression(intersection)).useSnapshot(tableHandle.getSnapshotId().get()).includeColumnStats();
    Partition summary = null;
    try (CloseableIterable<FileScanTask> fileScanTasks = tableScan.planFiles()) {
        for (FileScanTask fileScanTask : fileScanTasks) {
            DataFile dataFile = fileScanTask.file();
            if (!dataFileMatches(dataFile, constraint, idToTypeMapping, partitionFields, idToDetails)) {
                continue;
            }
            if (summary == null) {
                summary = new Partition(idToTypeMapping, nonPartitionPrimitiveColumns, dataFile.partition(), dataFile.recordCount(), dataFile.fileSizeInBytes(), toMap(idToTypeMapping, dataFile.lowerBounds()), toMap(idToTypeMapping, dataFile.upperBounds()), dataFile.nullValueCounts(), dataFile.columnSizes());
            } else {
                summary.incrementFileCount();
                summary.incrementRecordCount(dataFile.recordCount());
                summary.incrementSize(dataFile.fileSizeInBytes());
                updateSummaryMin(summary, partitionFields, toMap(idToTypeMapping, dataFile.lowerBounds()), dataFile.nullValueCounts(), dataFile.recordCount());
                updateSummaryMax(summary, partitionFields, toMap(idToTypeMapping, dataFile.upperBounds()), dataFile.nullValueCounts(), dataFile.recordCount());
                summary.updateNullCount(dataFile.nullValueCounts());
                updateColumnSizes(summary, dataFile.columnSizes());
            }
        }
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
    if (summary == null) {
        return TableStatistics.empty();
    }
    double recordCount = summary.getRecordCount();
    TableStatistics.Builder result = TableStatistics.builder();
    result.setRowCount(Estimate.of(recordCount));
    result.setTotalSize(Estimate.of(summary.getSize()));
    for (IcebergColumnHandle columnHandle : idToColumnHandle.values()) {
        int fieldId = columnHandle.getId();
        ColumnStatistics.Builder columnBuilder = new ColumnStatistics.Builder();
        Long nullCount = summary.getNullCounts().get(fieldId);
        if (nullCount != null) {
            columnBuilder.setNullsFraction(Estimate.of(nullCount / recordCount));
        }
        if (summary.getColumnSizes() != null) {
            Long columnSize = summary.getColumnSizes().get(fieldId);
            if (columnSize != null) {
                columnBuilder.setDataSize(Estimate.of(columnSize));
            }
        }
        Object min = summary.getMinValues().get(fieldId);
        Object max = summary.getMaxValues().get(fieldId);
        if (min instanceof Number && max instanceof Number) {
            columnBuilder.setRange(Optional.of(new DoubleRange(((Number) min).doubleValue(), ((Number) max).doubleValue())));
        }
        result.setColumnStatistics(columnHandle, columnBuilder.build());
    }
    return result.build();
}
Also used : Types(org.apache.iceberg.types.Types) ColumnStatistics(com.facebook.presto.spi.statistics.ColumnStatistics) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) PartitionField(org.apache.iceberg.PartitionField) DoubleRange(com.facebook.presto.spi.statistics.DoubleRange) ImmutableList(com.google.common.collect.ImmutableList) Partition.toMap(com.facebook.presto.iceberg.Partition.toMap) TypeManager(com.facebook.presto.common.type.TypeManager) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) IcebergUtil.getIdentityPartitions(com.facebook.presto.iceberg.IcebergUtil.getIdentityPartitions) FileScanTask(org.apache.iceberg.FileScanTask) DataFile(org.apache.iceberg.DataFile) ExpressionConverter.toIcebergExpression(com.facebook.presto.iceberg.ExpressionConverter.toIcebergExpression) IcebergUtil.getColumns(com.facebook.presto.iceberg.IcebergUtil.getColumns) Collectors.toSet(java.util.stream.Collectors.toSet) Comparators(org.apache.iceberg.types.Comparators) TypeConverter.toPrestoType(com.facebook.presto.iceberg.TypeConverter.toPrestoType) CloseableIterable(org.apache.iceberg.io.CloseableIterable) NullableValue(com.facebook.presto.common.predicate.NullableValue) ImmutableMap(com.google.common.collect.ImmutableMap) Table(org.apache.iceberg.Table) Predicate(java.util.function.Predicate) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) Constraint(com.facebook.presto.spi.Constraint) TableScan(org.apache.iceberg.TableScan) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) Type(org.apache.iceberg.types.Type) UncheckedIOException(java.io.UncheckedIOException) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Estimate(com.facebook.presto.spi.statistics.Estimate) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) Comparator(java.util.Comparator) Types(org.apache.iceberg.types.Types) UncheckedIOException(java.io.UncheckedIOException) DataFile(org.apache.iceberg.DataFile) PartitionField(org.apache.iceberg.PartitionField) ColumnStatistics(com.facebook.presto.spi.statistics.ColumnStatistics) TableScan(org.apache.iceberg.TableScan) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Constraint(com.facebook.presto.spi.Constraint) DoubleRange(com.facebook.presto.spi.statistics.DoubleRange) TypeConverter.toPrestoType(com.facebook.presto.iceberg.TypeConverter.toPrestoType) Type(org.apache.iceberg.types.Type) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) FileScanTask(org.apache.iceberg.FileScanTask)

Example 22 with TypeManager

use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.

the class IcebergFileWriterFactory method createParquetWriter.

private IcebergFileWriter createParquetWriter(Path outputPath, Schema icebergSchema, JobConf jobConf, ConnectorSession session, HdfsContext hdfsContext) {
    List<String> fileColumnNames = icebergSchema.columns().stream().map(Types.NestedField::name).collect(toImmutableList());
    List<Type> fileColumnTypes = icebergSchema.columns().stream().map(column -> toPrestoType(column.type(), typeManager)).collect(toImmutableList());
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), outputPath, jobConf);
        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(outputPath, false);
            return null;
        };
        ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder().setMaxPageSize(getParquetWriterPageSize(session)).setMaxPageSize(getParquetWriterBlockSize(session)).build();
        return new IcebergParquetFileWriter(hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.create(outputPath)), rollbackAction, fileColumnNames, fileColumnTypes, convert(icebergSchema, "table"), makeTypeMap(fileColumnTypes, fileColumnNames), parquetWriterOptions, IntStream.range(0, fileColumnNames.size()).toArray(), getCompressionCodec(session).getParquetCompressionCodec().get(), outputPath, hdfsEnvironment, hdfsContext);
    } catch (IOException e) {
        throw new PrestoException(ICEBERG_WRITER_OPEN_ERROR, "Error creating Parquet file", e);
    }
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) HdfsOrcDataSource(com.facebook.presto.hive.orc.HdfsOrcDataSource) Types(org.apache.iceberg.types.Types) FileSystem(org.apache.hadoop.fs.FileSystem) DataSink(com.facebook.presto.common.io.DataSink) IcebergSessionProperties.getOrcMaxBufferSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxBufferSize) NodeVersion(com.facebook.presto.hive.NodeVersion) PRESTO_VERSION_NAME(com.facebook.presto.hive.HiveMetadata.PRESTO_VERSION_NAME) Path(org.apache.hadoop.fs.Path) HiveSessionProperties.getParquetWriterBlockSize(com.facebook.presto.hive.HiveSessionProperties.getParquetWriterBlockSize) OrcDataSource(com.facebook.presto.orc.OrcDataSource) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) HdfsContext(com.facebook.presto.hive.HdfsContext) TypeConverter.toPrestoType(com.facebook.presto.iceberg.TypeConverter.toPrestoType) ParquetSchemaUtil.convert(org.apache.iceberg.parquet.ParquetSchemaUtil.convert) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HiveDwrfEncryptionProvider(com.facebook.presto.hive.HiveDwrfEncryptionProvider) Schema(org.apache.iceberg.Schema) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ORC(com.facebook.presto.orc.OrcEncoding.ORC) List(java.util.List) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) ICEBERG_WRITER_OPEN_ERROR(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_WRITER_OPEN_ERROR) IcebergSessionProperties.isOrcOptimizedWriterValidate(com.facebook.presto.iceberg.IcebergSessionProperties.isOrcOptimizedWriterValidate) Optional(java.util.Optional) OutputStreamDataSink(com.facebook.presto.common.io.OutputStreamDataSink) HiveSessionProperties(com.facebook.presto.hive.HiveSessionProperties) IntStream(java.util.stream.IntStream) HiveSessionProperties.getParquetWriterPageSize(com.facebook.presto.hive.HiveSessionProperties.getParquetWriterPageSize) Callable(java.util.concurrent.Callable) PrestoException(com.facebook.presto.spi.PrestoException) Supplier(java.util.function.Supplier) Inject(javax.inject.Inject) IcebergSessionProperties.getCompressionCodec(com.facebook.presto.iceberg.IcebergSessionProperties.getCompressionCodec) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) PrimitiveTypeMapBuilder.makeTypeMap(com.facebook.presto.iceberg.util.PrimitiveTypeMapBuilder.makeTypeMap) TypeConverter.toOrcType(com.facebook.presto.iceberg.TypeConverter.toOrcType) OrcWriterStats(com.facebook.presto.orc.OrcWriterStats) Type(com.facebook.presto.common.type.Type) IcebergSessionProperties.getOrcMaxMergeDistance(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxMergeDistance) DwrfEncryptionProvider(com.facebook.presto.orc.DwrfEncryptionProvider) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) DefaultOrcWriterFlushPolicy(com.facebook.presto.orc.DefaultOrcWriterFlushPolicy) OrcFileWriterConfig(com.facebook.presto.hive.OrcFileWriterConfig) ParquetWriterOptions(com.facebook.presto.parquet.writer.ParquetWriterOptions) IOException(java.io.IOException) UTC(org.joda.time.DateTimeZone.UTC) FileFormat(org.apache.iceberg.FileFormat) ICEBERG_WRITE_VALIDATION_FAILED(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_WRITE_VALIDATION_FAILED) JobConf(org.apache.hadoop.mapred.JobConf) IcebergSessionProperties.getOrcOptimizedWriterValidateMode(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcOptimizedWriterValidateMode) PRESTO_QUERY_ID_NAME(com.facebook.presto.hive.metastore.MetastoreUtil.PRESTO_QUERY_ID_NAME) IcebergSessionProperties.getOrcStreamBufferSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcStreamBufferSize) Types(org.apache.iceberg.types.Types) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) TypeConverter.toPrestoType(com.facebook.presto.iceberg.TypeConverter.toPrestoType) TypeConverter.toOrcType(com.facebook.presto.iceberg.TypeConverter.toOrcType) Type(com.facebook.presto.common.type.Type) FileSystem(org.apache.hadoop.fs.FileSystem) ParquetWriterOptions(com.facebook.presto.parquet.writer.ParquetWriterOptions)

Example 23 with TypeManager

use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.

the class AccumuloModule method configure.

@Override
public void configure(Binder binder) {
    // Add appender to Log4J root logger
    // create appender
    JulAppender appender = new JulAppender();
    appender.setLayout(new PatternLayout("%d %-5p %c - %m%n"));
    appender.setThreshold(Level.INFO);
    appender.activateOptions();
    org.apache.log4j.Logger.getRootLogger().addAppender(appender);
    binder.bind(TypeManager.class).toInstance(typeManager);
    binder.bind(AccumuloConnector.class).in(Scopes.SINGLETON);
    binder.bind(AccumuloConnectorId.class).toInstance(new AccumuloConnectorId(connectorId));
    binder.bind(AccumuloMetadata.class).in(Scopes.SINGLETON);
    binder.bind(AccumuloMetadataFactory.class).in(Scopes.SINGLETON);
    binder.bind(AccumuloClient.class).in(Scopes.SINGLETON);
    binder.bind(AccumuloSplitManager.class).in(Scopes.SINGLETON);
    binder.bind(AccumuloRecordSetProvider.class).in(Scopes.SINGLETON);
    binder.bind(AccumuloPageSinkProvider.class).in(Scopes.SINGLETON);
    binder.bind(AccumuloHandleResolver.class).in(Scopes.SINGLETON);
    binder.bind(AccumuloSessionProperties.class).in(Scopes.SINGLETON);
    binder.bind(AccumuloTableProperties.class).in(Scopes.SINGLETON);
    binder.bind(ZooKeeperMetadataManager.class).in(Scopes.SINGLETON);
    binder.bind(AccumuloTableManager.class).in(Scopes.SINGLETON);
    binder.bind(IndexLookup.class).in(Scopes.SINGLETON);
    binder.bind(ColumnCardinalityCache.class).in(Scopes.SINGLETON);
    binder.bind(Connector.class).toProvider(ConnectorProvider.class);
    configBinder(binder).bindConfig(AccumuloConfig.class);
    jsonBinder(binder).addDeserializerBinding(Type.class).to(TypeDeserializer.class);
    jsonCodecBinder(binder).bindMapJsonCodec(String.class, JsonCodec.listJsonCodec(AccumuloTable.class));
}
Also used : Connector(org.apache.accumulo.core.client.Connector) ColumnCardinalityCache(com.facebook.presto.accumulo.index.ColumnCardinalityCache) PatternLayout(org.apache.log4j.PatternLayout) AccumuloTableProperties(com.facebook.presto.accumulo.conf.AccumuloTableProperties) IndexLookup(com.facebook.presto.accumulo.index.IndexLookup) AccumuloTable(com.facebook.presto.accumulo.metadata.AccumuloTable) AccumuloPageSinkProvider(com.facebook.presto.accumulo.io.AccumuloPageSinkProvider) Type(com.facebook.presto.common.type.Type) AccumuloSessionProperties(com.facebook.presto.accumulo.conf.AccumuloSessionProperties) ZooKeeperMetadataManager(com.facebook.presto.accumulo.metadata.ZooKeeperMetadataManager) TypeManager(com.facebook.presto.common.type.TypeManager) JulAppender(org.apache.log4j.JulAppender) AccumuloRecordSetProvider(com.facebook.presto.accumulo.io.AccumuloRecordSetProvider)

Example 24 with TypeManager

use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.

the class DeltaExpressionUtils method iterateWithPartitionPruning.

/**
 * Utility method that takes an iterator of {@link AddFile}s and a predicate and returns an iterator of {@link AddFile}s
 * that satisfy the predicate (predicate evaluates to a deterministic NO)
 */
public static CloseableIterator<AddFile> iterateWithPartitionPruning(CloseableIterator<AddFile> inputIterator, TupleDomain<DeltaColumnHandle> predicate, TypeManager typeManager) {
    TupleDomain<String> partitionPredicate = extractPartitionColumnsPredicate(predicate);
    if (partitionPredicate.isAll()) {
        // there is no partition filter, return the input iterator as is.
        return inputIterator;
    }
    if (partitionPredicate.isNone()) {
        // nothing passes the partition predicate, return empty iterator
        return new CloseableIterator<AddFile>() {

            @Override
            public boolean hasNext() {
                return false;
            }

            @Override
            public AddFile next() {
                throw new NoSuchElementException();
            }

            @Override
            public void close() throws IOException {
                inputIterator.close();
            }
        };
    }
    List<DeltaColumnHandle> partitionColumns = predicate.getColumnDomains().get().stream().filter(entry -> entry.getColumn().getColumnType() == PARTITION).map(entry -> entry.getColumn()).collect(Collectors.toList());
    return new CloseableIterator<AddFile>() {

        private AddFile nextItem;

        @Override
        public boolean hasNext() {
            if (nextItem != null) {
                return true;
            }
            while (inputIterator.hasNext()) {
                AddFile nextFile = inputIterator.next();
                if (evaluatePartitionPredicate(partitionPredicate, partitionColumns, typeManager, nextFile)) {
                    nextItem = nextFile;
                    break;
                }
            }
            return nextItem != null;
        }

        @Override
        public AddFile next() {
            if (!hasNext()) {
                throw new NoSuchElementException("there are no more files");
            }
            AddFile toReturn = nextItem;
            nextItem = null;
            return toReturn;
        }

        @Override
        public void close() throws IOException {
            inputIterator.close();
        }
    };
}
Also used : PARTITION(com.facebook.presto.delta.DeltaColumnHandle.ColumnType.PARTITION) StandardTypes(com.facebook.presto.common.type.StandardTypes) Slice(io.airlift.slice.Slice) PrestoException(com.facebook.presto.spi.PrestoException) DELTA_UNSUPPORTED_COLUMN_TYPE(com.facebook.presto.delta.DeltaErrorCode.DELTA_UNSUPPORTED_COLUMN_TYPE) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) Float.parseFloat(java.lang.Float.parseFloat) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ImmutableList(com.google.common.collect.ImmutableList) CloseableIterator(io.delta.standalone.data.CloseableIterator) TypeManager(com.facebook.presto.common.type.TypeManager) Map(java.util.Map) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) NoSuchElementException(java.util.NoSuchElementException) Type(com.facebook.presto.common.type.Type) Double.parseDouble(java.lang.Double.parseDouble) ImmutableMap(com.google.common.collect.ImmutableMap) Timestamp(java.sql.Timestamp) DELTA_INVALID_PARTITION_VALUE(com.facebook.presto.delta.DeltaErrorCode.DELTA_INVALID_PARTITION_VALUE) IOException(java.io.IOException) AddFile(io.delta.standalone.actions.AddFile) Collectors(java.util.stream.Collectors) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) String.format(java.lang.String.format) Date(java.sql.Date) Double.doubleToRawLongBits(java.lang.Double.doubleToRawLongBits) List(java.util.List) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Optional(java.util.Optional) Long.parseLong(java.lang.Long.parseLong) ValueSet(com.facebook.presto.common.predicate.ValueSet) AddFile(io.delta.standalone.actions.AddFile) CloseableIterator(io.delta.standalone.data.CloseableIterator) NoSuchElementException(java.util.NoSuchElementException)

Example 25 with TypeManager

use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.

the class DeltaPageSourceProvider method createPageSource.

@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorSplit split, ConnectorTableLayoutHandle layout, List<ColumnHandle> columns, SplitContext splitContext) {
    DeltaSplit deltaSplit = (DeltaSplit) split;
    DeltaTableLayoutHandle deltaTableLayoutHandle = (DeltaTableLayoutHandle) layout;
    DeltaTableHandle deltaTableHandle = deltaTableLayoutHandle.getTable();
    HdfsContext hdfsContext = new HdfsContext(session, deltaSplit.getSchema(), deltaSplit.getTable(), deltaSplit.getFilePath(), false);
    Path filePath = new Path(deltaSplit.getFilePath());
    List<DeltaColumnHandle> deltaColumnHandles = columns.stream().map(DeltaColumnHandle.class::cast).collect(Collectors.toList());
    List<DeltaColumnHandle> regularColumnHandles = deltaColumnHandles.stream().filter(columnHandle -> columnHandle.getColumnType() != PARTITION).collect(Collectors.toList());
    ConnectorPageSource dataPageSource = createParquetPageSource(hdfsEnvironment, session.getUser(), hdfsEnvironment.getConfiguration(hdfsContext, filePath), filePath, deltaSplit.getStart(), deltaSplit.getLength(), deltaSplit.getFileSize(), regularColumnHandles, deltaTableHandle.toSchemaTableName(), getParquetMaxReadBlockSize(session), isParquetBatchReadsEnabled(session), isParquetBatchReaderVerificationEnabled(session), typeManager, deltaTableLayoutHandle.getPredicate(), fileFormatDataSourceStats, false);
    return new DeltaPageSource(deltaColumnHandles, convertPartitionValues(deltaColumnHandles, deltaSplit.getPartitionValues()), dataPageSource);
}
Also used : Path(org.apache.hadoop.fs.Path) ParquetTypeUtils.nestedColumnPath(com.facebook.presto.parquet.ParquetTypeUtils.nestedColumnPath) ColumnIOConverter.constructField(org.apache.parquet.io.ColumnIOConverter.constructField) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) DeltaColumnHandle.getPushedDownSubfield(com.facebook.presto.delta.DeltaColumnHandle.getPushedDownSubfield) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) ParquetTypeUtils.lookupColumnByName(com.facebook.presto.parquet.ParquetTypeUtils.lookupColumnByName) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Collectors.toMap(java.util.stream.Collectors.toMap) SplitContext(com.facebook.presto.spi.SplitContext) ParquetTypeUtils.getDescriptors(com.facebook.presto.parquet.ParquetTypeUtils.getDescriptors) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) DeltaColumnHandle.isPushedDownSubfield(com.facebook.presto.delta.DeltaColumnHandle.isPushedDownSubfield) RuntimeStats(com.facebook.presto.common.RuntimeStats) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) HdfsContext(com.facebook.presto.hive.HdfsContext) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ParquetDataSource(com.facebook.presto.parquet.ParquetDataSource) SUBFIELD(com.facebook.presto.delta.DeltaColumnHandle.ColumnType.SUBFIELD) GroupType(org.apache.parquet.schema.GroupType) ImmutableMap(com.google.common.collect.ImmutableMap) DELTA_MISSING_DATA(com.facebook.presto.delta.DeltaErrorCode.DELTA_MISSING_DATA) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) Collectors(java.util.stream.Collectors) ColumnIOConverter.findNestedColumnIO(org.apache.parquet.io.ColumnIOConverter.findNestedColumnIO) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) ColumnIndexFilterUtils(com.facebook.presto.parquet.reader.ColumnIndexFilterUtils) ConnectorSession(com.facebook.presto.spi.ConnectorSession) MessageType(org.apache.parquet.schema.MessageType) DataSize(io.airlift.units.DataSize) List(java.util.List) DELTA_CANNOT_OPEN_SPLIT(com.facebook.presto.delta.DeltaErrorCode.DELTA_CANNOT_OPEN_SPLIT) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ParquetTypeUtils.columnPathFromSubfield(com.facebook.presto.parquet.ParquetTypeUtils.columnPathFromSubfield) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ColumnIO(org.apache.parquet.io.ColumnIO) Optional(java.util.Optional) DELTA_PARQUET_SCHEMA_MISMATCH(com.facebook.presto.delta.DeltaErrorCode.DELTA_PARQUET_SCHEMA_MISMATCH) ParquetPageSource(com.facebook.presto.hive.parquet.ParquetPageSource) REGULAR(com.facebook.presto.delta.DeltaColumnHandle.ColumnType.REGULAR) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) DeltaSessionProperties.getParquetMaxReadBlockSize(com.facebook.presto.delta.DeltaSessionProperties.getParquetMaxReadBlockSize) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) PARTITION(com.facebook.presto.delta.DeltaColumnHandle.ColumnType.PARTITION) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) Utils(com.facebook.presto.common.Utils) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) PredicateUtils.predicateMatches(com.facebook.presto.parquet.predicate.PredicateUtils.predicateMatches) PrestoException(com.facebook.presto.spi.PrestoException) DeltaSessionProperties.isParquetBatchReaderVerificationEnabled(com.facebook.presto.delta.DeltaSessionProperties.isParquetBatchReaderVerificationEnabled) ArrayList(java.util.ArrayList) ParquetTypeUtils.getSubfieldType(com.facebook.presto.parquet.ParquetTypeUtils.getSubfieldType) Inject(javax.inject.Inject) ParquetTypeUtils.getParquetTypeByName(com.facebook.presto.parquet.ParquetTypeUtils.getParquetTypeByName) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicate(com.facebook.presto.parquet.predicate.Predicate) ParquetPageSourceFactory.checkSchemaMatch(com.facebook.presto.hive.parquet.ParquetPageSourceFactory.checkSchemaMatch) DELTA_BAD_DATA(com.facebook.presto.delta.DeltaErrorCode.DELTA_BAD_DATA) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) PredicateUtils.buildPredicate(com.facebook.presto.parquet.predicate.PredicateUtils.buildPredicate) Type(com.facebook.presto.common.type.Type) ParquetTypeUtils.getColumnIO(com.facebook.presto.parquet.ParquetTypeUtils.getColumnIO) IOException(java.io.IOException) ParquetTypeUtils.nestedColumnPath(com.facebook.presto.parquet.ParquetTypeUtils.nestedColumnPath) DeltaSessionProperties.isParquetBatchReadsEnabled(com.facebook.presto.delta.DeltaSessionProperties.isParquetBatchReadsEnabled) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) AggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext) ParquetReader(com.facebook.presto.parquet.reader.ParquetReader) PERMISSION_DENIED(com.facebook.presto.spi.StandardErrorCode.PERMISSION_DENIED) Field(com.facebook.presto.parquet.Field) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) DeltaTypeUtils.convertPartitionValue(com.facebook.presto.delta.DeltaTypeUtils.convertPartitionValue) ColumnHandle(com.facebook.presto.spi.ColumnHandle) AccessControlException(org.apache.hadoop.security.AccessControlException) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) Block(com.facebook.presto.common.block.Block) HdfsContext(com.facebook.presto.hive.HdfsContext) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource)

Aggregations

TypeManager (com.facebook.presto.common.type.TypeManager)33 List (java.util.List)28 Type (com.facebook.presto.common.type.Type)27 Optional (java.util.Optional)27 Objects.requireNonNull (java.util.Objects.requireNonNull)26 PrestoException (com.facebook.presto.spi.PrestoException)25 Map (java.util.Map)23 Path (org.apache.hadoop.fs.Path)23 ImmutableMap (com.google.common.collect.ImmutableMap)22 IOException (java.io.IOException)22 ConnectorSession (com.facebook.presto.spi.ConnectorSession)21 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)20 ImmutableList (com.google.common.collect.ImmutableList)20 String.format (java.lang.String.format)20 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)18 DateTimeZone (org.joda.time.DateTimeZone)18 SchemaTableName (com.facebook.presto.spi.SchemaTableName)17 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)16 Domain (com.facebook.presto.common.predicate.Domain)15 Set (java.util.Set)15