Search in sources :

Example 1 with PARTITION

use of com.facebook.presto.delta.DeltaColumnHandle.ColumnType.PARTITION in project presto by prestodb.

the class DeltaExpressionUtils method iterateWithPartitionPruning.

/**
 * Utility method that takes an iterator of {@link AddFile}s and a predicate and returns an iterator of {@link AddFile}s
 * that satisfy the predicate (predicate evaluates to a deterministic NO)
 */
public static CloseableIterator<AddFile> iterateWithPartitionPruning(CloseableIterator<AddFile> inputIterator, TupleDomain<DeltaColumnHandle> predicate, TypeManager typeManager) {
    TupleDomain<String> partitionPredicate = extractPartitionColumnsPredicate(predicate);
    if (partitionPredicate.isAll()) {
        // there is no partition filter, return the input iterator as is.
        return inputIterator;
    }
    if (partitionPredicate.isNone()) {
        // nothing passes the partition predicate, return empty iterator
        return new CloseableIterator<AddFile>() {

            @Override
            public boolean hasNext() {
                return false;
            }

            @Override
            public AddFile next() {
                throw new NoSuchElementException();
            }

            @Override
            public void close() throws IOException {
                inputIterator.close();
            }
        };
    }
    List<DeltaColumnHandle> partitionColumns = predicate.getColumnDomains().get().stream().filter(entry -> entry.getColumn().getColumnType() == PARTITION).map(entry -> entry.getColumn()).collect(Collectors.toList());
    return new CloseableIterator<AddFile>() {

        private AddFile nextItem;

        @Override
        public boolean hasNext() {
            if (nextItem != null) {
                return true;
            }
            while (inputIterator.hasNext()) {
                AddFile nextFile = inputIterator.next();
                if (evaluatePartitionPredicate(partitionPredicate, partitionColumns, typeManager, nextFile)) {
                    nextItem = nextFile;
                    break;
                }
            }
            return nextItem != null;
        }

        @Override
        public AddFile next() {
            if (!hasNext()) {
                throw new NoSuchElementException("there are no more files");
            }
            AddFile toReturn = nextItem;
            nextItem = null;
            return toReturn;
        }

        @Override
        public void close() throws IOException {
            inputIterator.close();
        }
    };
}
Also used : PARTITION(com.facebook.presto.delta.DeltaColumnHandle.ColumnType.PARTITION) StandardTypes(com.facebook.presto.common.type.StandardTypes) Slice(io.airlift.slice.Slice) PrestoException(com.facebook.presto.spi.PrestoException) DELTA_UNSUPPORTED_COLUMN_TYPE(com.facebook.presto.delta.DeltaErrorCode.DELTA_UNSUPPORTED_COLUMN_TYPE) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) Float.parseFloat(java.lang.Float.parseFloat) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ImmutableList(com.google.common.collect.ImmutableList) CloseableIterator(io.delta.standalone.data.CloseableIterator) TypeManager(com.facebook.presto.common.type.TypeManager) Map(java.util.Map) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) NoSuchElementException(java.util.NoSuchElementException) Type(com.facebook.presto.common.type.Type) Double.parseDouble(java.lang.Double.parseDouble) ImmutableMap(com.google.common.collect.ImmutableMap) Timestamp(java.sql.Timestamp) DELTA_INVALID_PARTITION_VALUE(com.facebook.presto.delta.DeltaErrorCode.DELTA_INVALID_PARTITION_VALUE) IOException(java.io.IOException) AddFile(io.delta.standalone.actions.AddFile) Collectors(java.util.stream.Collectors) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) String.format(java.lang.String.format) Date(java.sql.Date) Double.doubleToRawLongBits(java.lang.Double.doubleToRawLongBits) List(java.util.List) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Optional(java.util.Optional) Long.parseLong(java.lang.Long.parseLong) ValueSet(com.facebook.presto.common.predicate.ValueSet) AddFile(io.delta.standalone.actions.AddFile) CloseableIterator(io.delta.standalone.data.CloseableIterator) NoSuchElementException(java.util.NoSuchElementException)

Example 2 with PARTITION

use of com.facebook.presto.delta.DeltaColumnHandle.ColumnType.PARTITION in project presto by prestodb.

the class DeltaPageSourceProvider method createPageSource.

@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorSplit split, ConnectorTableLayoutHandle layout, List<ColumnHandle> columns, SplitContext splitContext) {
    DeltaSplit deltaSplit = (DeltaSplit) split;
    DeltaTableLayoutHandle deltaTableLayoutHandle = (DeltaTableLayoutHandle) layout;
    DeltaTableHandle deltaTableHandle = deltaTableLayoutHandle.getTable();
    HdfsContext hdfsContext = new HdfsContext(session, deltaSplit.getSchema(), deltaSplit.getTable(), deltaSplit.getFilePath(), false);
    Path filePath = new Path(deltaSplit.getFilePath());
    List<DeltaColumnHandle> deltaColumnHandles = columns.stream().map(DeltaColumnHandle.class::cast).collect(Collectors.toList());
    List<DeltaColumnHandle> regularColumnHandles = deltaColumnHandles.stream().filter(columnHandle -> columnHandle.getColumnType() != PARTITION).collect(Collectors.toList());
    ConnectorPageSource dataPageSource = createParquetPageSource(hdfsEnvironment, session.getUser(), hdfsEnvironment.getConfiguration(hdfsContext, filePath), filePath, deltaSplit.getStart(), deltaSplit.getLength(), deltaSplit.getFileSize(), regularColumnHandles, deltaTableHandle.toSchemaTableName(), getParquetMaxReadBlockSize(session), isParquetBatchReadsEnabled(session), isParquetBatchReaderVerificationEnabled(session), typeManager, deltaTableLayoutHandle.getPredicate(), fileFormatDataSourceStats, false);
    return new DeltaPageSource(deltaColumnHandles, convertPartitionValues(deltaColumnHandles, deltaSplit.getPartitionValues()), dataPageSource);
}
Also used : Path(org.apache.hadoop.fs.Path) ParquetTypeUtils.nestedColumnPath(com.facebook.presto.parquet.ParquetTypeUtils.nestedColumnPath) ColumnIOConverter.constructField(org.apache.parquet.io.ColumnIOConverter.constructField) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) DeltaColumnHandle.getPushedDownSubfield(com.facebook.presto.delta.DeltaColumnHandle.getPushedDownSubfield) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) ParquetTypeUtils.lookupColumnByName(com.facebook.presto.parquet.ParquetTypeUtils.lookupColumnByName) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Collectors.toMap(java.util.stream.Collectors.toMap) SplitContext(com.facebook.presto.spi.SplitContext) ParquetTypeUtils.getDescriptors(com.facebook.presto.parquet.ParquetTypeUtils.getDescriptors) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) DeltaColumnHandle.isPushedDownSubfield(com.facebook.presto.delta.DeltaColumnHandle.isPushedDownSubfield) RuntimeStats(com.facebook.presto.common.RuntimeStats) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) HdfsContext(com.facebook.presto.hive.HdfsContext) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ParquetDataSource(com.facebook.presto.parquet.ParquetDataSource) SUBFIELD(com.facebook.presto.delta.DeltaColumnHandle.ColumnType.SUBFIELD) GroupType(org.apache.parquet.schema.GroupType) ImmutableMap(com.google.common.collect.ImmutableMap) DELTA_MISSING_DATA(com.facebook.presto.delta.DeltaErrorCode.DELTA_MISSING_DATA) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) Collectors(java.util.stream.Collectors) ColumnIOConverter.findNestedColumnIO(org.apache.parquet.io.ColumnIOConverter.findNestedColumnIO) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) ColumnIndexFilterUtils(com.facebook.presto.parquet.reader.ColumnIndexFilterUtils) ConnectorSession(com.facebook.presto.spi.ConnectorSession) MessageType(org.apache.parquet.schema.MessageType) DataSize(io.airlift.units.DataSize) List(java.util.List) DELTA_CANNOT_OPEN_SPLIT(com.facebook.presto.delta.DeltaErrorCode.DELTA_CANNOT_OPEN_SPLIT) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ParquetTypeUtils.columnPathFromSubfield(com.facebook.presto.parquet.ParquetTypeUtils.columnPathFromSubfield) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ColumnIO(org.apache.parquet.io.ColumnIO) Optional(java.util.Optional) DELTA_PARQUET_SCHEMA_MISMATCH(com.facebook.presto.delta.DeltaErrorCode.DELTA_PARQUET_SCHEMA_MISMATCH) ParquetPageSource(com.facebook.presto.hive.parquet.ParquetPageSource) REGULAR(com.facebook.presto.delta.DeltaColumnHandle.ColumnType.REGULAR) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) DeltaSessionProperties.getParquetMaxReadBlockSize(com.facebook.presto.delta.DeltaSessionProperties.getParquetMaxReadBlockSize) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) PARTITION(com.facebook.presto.delta.DeltaColumnHandle.ColumnType.PARTITION) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) Utils(com.facebook.presto.common.Utils) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) PredicateUtils.predicateMatches(com.facebook.presto.parquet.predicate.PredicateUtils.predicateMatches) PrestoException(com.facebook.presto.spi.PrestoException) DeltaSessionProperties.isParquetBatchReaderVerificationEnabled(com.facebook.presto.delta.DeltaSessionProperties.isParquetBatchReaderVerificationEnabled) ArrayList(java.util.ArrayList) ParquetTypeUtils.getSubfieldType(com.facebook.presto.parquet.ParquetTypeUtils.getSubfieldType) Inject(javax.inject.Inject) ParquetTypeUtils.getParquetTypeByName(com.facebook.presto.parquet.ParquetTypeUtils.getParquetTypeByName) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicate(com.facebook.presto.parquet.predicate.Predicate) ParquetPageSourceFactory.checkSchemaMatch(com.facebook.presto.hive.parquet.ParquetPageSourceFactory.checkSchemaMatch) DELTA_BAD_DATA(com.facebook.presto.delta.DeltaErrorCode.DELTA_BAD_DATA) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) PredicateUtils.buildPredicate(com.facebook.presto.parquet.predicate.PredicateUtils.buildPredicate) Type(com.facebook.presto.common.type.Type) ParquetTypeUtils.getColumnIO(com.facebook.presto.parquet.ParquetTypeUtils.getColumnIO) IOException(java.io.IOException) ParquetTypeUtils.nestedColumnPath(com.facebook.presto.parquet.ParquetTypeUtils.nestedColumnPath) DeltaSessionProperties.isParquetBatchReadsEnabled(com.facebook.presto.delta.DeltaSessionProperties.isParquetBatchReadsEnabled) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) AggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext) ParquetReader(com.facebook.presto.parquet.reader.ParquetReader) PERMISSION_DENIED(com.facebook.presto.spi.StandardErrorCode.PERMISSION_DENIED) Field(com.facebook.presto.parquet.Field) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) DeltaTypeUtils.convertPartitionValue(com.facebook.presto.delta.DeltaTypeUtils.convertPartitionValue) ColumnHandle(com.facebook.presto.spi.ColumnHandle) AccessControlException(org.apache.hadoop.security.AccessControlException) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) Block(com.facebook.presto.common.block.Block) HdfsContext(com.facebook.presto.hive.HdfsContext) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource)

Aggregations

Domain (com.facebook.presto.common.predicate.Domain)2 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)2 Type (com.facebook.presto.common.type.Type)2 TypeManager (com.facebook.presto.common.type.TypeManager)2 PARTITION (com.facebook.presto.delta.DeltaColumnHandle.ColumnType.PARTITION)2 ColumnHandle (com.facebook.presto.spi.ColumnHandle)2 PrestoException (com.facebook.presto.spi.PrestoException)2 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)2 ImmutableList (com.google.common.collect.ImmutableList)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 IOException (java.io.IOException)2 String.format (java.lang.String.format)2 List (java.util.List)2 Map (java.util.Map)2 Optional (java.util.Optional)2 Collectors (java.util.stream.Collectors)2 RuntimeStats (com.facebook.presto.common.RuntimeStats)1 Subfield (com.facebook.presto.common.Subfield)1 Utils (com.facebook.presto.common.Utils)1 Block (com.facebook.presto.common.block.Block)1