Search in sources :

Example 1 with NonInterestingColumnsMetadata

use of org.apache.drill.metastore.metadata.NonInterestingColumnsMetadata in project drill by apache.

the class FilterEvaluatorUtils method evalFilter.

@SuppressWarnings("RedundantTypeArguments")
public static RowsMatch evalFilter(LogicalExpression expr, MetadataBase.ParquetTableMetadataBase footer, int rowGroupIndex, OptionManager options, FragmentContext fragmentContext) {
    // Specifies type arguments explicitly to avoid compilation error caused by JDK-8066974
    List<SchemaPath> schemaPathsInExpr = new ArrayList<>(expr.<Set<SchemaPath>, Void, RuntimeException>accept(FilterEvaluatorUtils.FieldReferenceFinder.INSTANCE, null));
    RowGroupMetadata rowGroupMetadata = new ArrayList<>(ParquetTableMetadataUtils.getRowGroupsMetadata(footer).values()).get(rowGroupIndex);
    NonInterestingColumnsMetadata nonInterestingColumnsMetadata = ParquetTableMetadataUtils.getNonInterestingColumnsMeta(footer);
    Map<SchemaPath, ColumnStatistics<?>> columnsStatistics = rowGroupMetadata.getColumnsStatistics();
    // Add column statistics of non-interesting columns if there are any
    columnsStatistics.putAll(nonInterestingColumnsMetadata.getColumnsStatistics());
    columnsStatistics = ParquetTableMetadataUtils.addImplicitColumnsStatistics(columnsStatistics, schemaPathsInExpr, Collections.emptyList(), options, rowGroupMetadata.getPath(), true);
    return matches(expr, columnsStatistics, rowGroupMetadata.getSchema(), TableStatisticsKind.ROW_COUNT.getValue(rowGroupMetadata), fragmentContext, fragmentContext.getFunctionRegistry(), new HashSet<>(schemaPathsInExpr));
}
Also used : ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) NonInterestingColumnsMetadata(org.apache.drill.metastore.metadata.NonInterestingColumnsMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) ArrayList(java.util.ArrayList) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata)

Example 2 with NonInterestingColumnsMetadata

use of org.apache.drill.metastore.metadata.NonInterestingColumnsMetadata in project drill by apache.

the class MetastoreFileTableMetadataProvider method getNonInterestingColumnsMetadata.

@Override
public NonInterestingColumnsMetadata getNonInterestingColumnsMetadata() {
    throwIfChanged();
    if (nonInterestingColumnsMetadata == null) {
        TupleMetadata schema = getTableMetadata().getSchema();
        List<StatisticsHolder<?>> statistics = Collections.singletonList(new StatisticsHolder<>(Statistic.NO_COLUMN_STATS, ColumnStatisticsKind.NULLS_COUNT));
        List<SchemaPath> columnPaths = SchemaUtil.getSchemaPaths(schema);
        List<SchemaPath> interestingColumns = getInterestingColumns(columnPaths);
        // populates statistics for non-interesting columns and columns for which statistics wasn't collected
        Map<SchemaPath, ColumnStatistics<?>> columnsStatistics = columnPaths.stream().filter(schemaPath -> !interestingColumns.contains(schemaPath) || SchemaPathUtils.getColumnMetadata(schemaPath, schema).isArray()).collect(Collectors.toMap(Function.identity(), schemaPath -> new ColumnStatistics<>(statistics, SchemaPathUtils.getColumnMetadata(schemaPath, schema).type())));
        nonInterestingColumnsMetadata = new NonInterestingColumnsMetadata(columnsStatistics);
    }
    return nonInterestingColumnsMetadata;
}
Also used : ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) TableInfo(org.apache.drill.metastore.metadata.TableInfo) LoggerFactory(org.slf4j.LoggerFactory) TableMetadataProvider(org.apache.drill.metastore.metadata.TableMetadataProvider) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) DrillFileSystem(org.apache.drill.exec.store.dfs.DrillFileSystem) Function(java.util.function.Function) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) MetastoreMetadataProviderManager(org.apache.drill.exec.metastore.MetastoreMetadataProviderManager) PartitionMetadata(org.apache.drill.metastore.metadata.PartitionMetadata) NonInterestingColumnsMetadata(org.apache.drill.metastore.metadata.NonInterestingColumnsMetadata) DrillStatsTable(org.apache.drill.exec.planner.common.DrillStatsTable) TableMetadata(org.apache.drill.metastore.metadata.TableMetadata) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) FileSelection(org.apache.drill.exec.store.dfs.FileSelection) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) ColumnStatisticsKind(org.apache.drill.metastore.statistics.ColumnStatisticsKind) SchemaPathUtils(org.apache.drill.metastore.util.SchemaPathUtils) Logger(org.slf4j.Logger) SchemaProvider(org.apache.drill.exec.record.metadata.schema.SchemaProvider) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) IOException(java.io.IOException) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) Collectors(java.util.stream.Collectors) DrillFileSystemUtil(org.apache.drill.exec.util.DrillFileSystemUtil) List(java.util.List) TableMetadataProviderBuilder(org.apache.drill.metastore.metadata.TableMetadataProviderBuilder) Statistic(org.apache.drill.metastore.statistics.Statistic) ParquetTableMetadataUtils(org.apache.drill.exec.store.parquet.ParquetTableMetadataUtils) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) MetadataException(org.apache.drill.exec.exception.MetadataException) SchemaUtil(org.apache.drill.exec.record.SchemaUtil) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) Collections(java.util.Collections) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) NonInterestingColumnsMetadata(org.apache.drill.metastore.metadata.NonInterestingColumnsMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata)

Example 3 with NonInterestingColumnsMetadata

use of org.apache.drill.metastore.metadata.NonInterestingColumnsMetadata in project drill by apache.

the class ParquetTableMetadataUtils method getNonInterestingColumnsMeta.

/**
 * Returns the non-interesting column's metadata
 * @param parquetTableMetadata the source of column metadata for non-interesting column's statistics
 * @return returns non-interesting columns metadata
 */
public static NonInterestingColumnsMetadata getNonInterestingColumnsMeta(MetadataBase.ParquetTableMetadataBase parquetTableMetadata) {
    Map<SchemaPath, ColumnStatistics<?>> columnsStatistics = new HashMap<>();
    if (parquetTableMetadata instanceof Metadata_V4.ParquetTableMetadata_v4) {
        Map<Metadata_V4.ColumnTypeMetadata_v4.Key, Metadata_V4.ColumnTypeMetadata_v4> columnTypeInfoMap = ((Metadata_V4.ParquetTableMetadata_v4) parquetTableMetadata).getColumnTypeInfoMap();
        if (columnTypeInfoMap == null) {
            return new NonInterestingColumnsMetadata(columnsStatistics);
        }
        for (Metadata_V4.ColumnTypeMetadata_v4 columnTypeMetadata : columnTypeInfoMap.values()) {
            if (!columnTypeMetadata.isInteresting) {
                SchemaPath schemaPath = SchemaPath.getCompoundPath(columnTypeMetadata.name);
                List<StatisticsHolder<?>> statistics = new ArrayList<>();
                statistics.add(new StatisticsHolder<>(Statistic.NO_COLUMN_STATS, ColumnStatisticsKind.NULLS_COUNT));
                PrimitiveType.PrimitiveTypeName primitiveType = columnTypeMetadata.primitiveType;
                OriginalType originalType = columnTypeMetadata.originalType;
                TypeProtos.MinorType type = ParquetReaderUtility.getMinorType(primitiveType, originalType);
                columnsStatistics.put(schemaPath, new ColumnStatistics<>(statistics, type));
            }
        }
        return new NonInterestingColumnsMetadata(columnsStatistics);
    }
    return new NonInterestingColumnsMetadata(columnsStatistics);
}
Also used : ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) NonInterestingColumnsMetadata(org.apache.drill.metastore.metadata.NonInterestingColumnsMetadata) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) TypeProtos(org.apache.drill.common.types.TypeProtos) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) OriginalType(org.apache.parquet.schema.OriginalType) Metadata_V4(org.apache.drill.exec.store.parquet.metadata.Metadata_V4) SchemaPath(org.apache.drill.common.expression.SchemaPath) PrimitiveType(org.apache.parquet.schema.PrimitiveType)

Aggregations

SchemaPath (org.apache.drill.common.expression.SchemaPath)3 NonInterestingColumnsMetadata (org.apache.drill.metastore.metadata.NonInterestingColumnsMetadata)3 ColumnStatistics (org.apache.drill.metastore.statistics.ColumnStatistics)3 ArrayList (java.util.ArrayList)2 StatisticsHolder (org.apache.drill.metastore.statistics.StatisticsHolder)2 IOException (java.io.IOException)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1 Map (java.util.Map)1 Function (java.util.function.Function)1 Collectors (java.util.stream.Collectors)1 TypeProtos (org.apache.drill.common.types.TypeProtos)1 MetadataException (org.apache.drill.exec.exception.MetadataException)1 MetastoreMetadataProviderManager (org.apache.drill.exec.metastore.MetastoreMetadataProviderManager)1 DrillStatsTable (org.apache.drill.exec.planner.common.DrillStatsTable)1 SchemaUtil (org.apache.drill.exec.record.SchemaUtil)1 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)1 SchemaProvider (org.apache.drill.exec.record.metadata.schema.SchemaProvider)1