use of org.apache.drill.metastore.metadata.NonInterestingColumnsMetadata in project drill by apache.
the class FilterEvaluatorUtils method evalFilter.
@SuppressWarnings("RedundantTypeArguments")
public static RowsMatch evalFilter(LogicalExpression expr, MetadataBase.ParquetTableMetadataBase footer, int rowGroupIndex, OptionManager options, FragmentContext fragmentContext) {
// Specifies type arguments explicitly to avoid compilation error caused by JDK-8066974
List<SchemaPath> schemaPathsInExpr = new ArrayList<>(expr.<Set<SchemaPath>, Void, RuntimeException>accept(FilterEvaluatorUtils.FieldReferenceFinder.INSTANCE, null));
RowGroupMetadata rowGroupMetadata = new ArrayList<>(ParquetTableMetadataUtils.getRowGroupsMetadata(footer).values()).get(rowGroupIndex);
NonInterestingColumnsMetadata nonInterestingColumnsMetadata = ParquetTableMetadataUtils.getNonInterestingColumnsMeta(footer);
Map<SchemaPath, ColumnStatistics<?>> columnsStatistics = rowGroupMetadata.getColumnsStatistics();
// Add column statistics of non-interesting columns if there are any
columnsStatistics.putAll(nonInterestingColumnsMetadata.getColumnsStatistics());
columnsStatistics = ParquetTableMetadataUtils.addImplicitColumnsStatistics(columnsStatistics, schemaPathsInExpr, Collections.emptyList(), options, rowGroupMetadata.getPath(), true);
return matches(expr, columnsStatistics, rowGroupMetadata.getSchema(), TableStatisticsKind.ROW_COUNT.getValue(rowGroupMetadata), fragmentContext, fragmentContext.getFunctionRegistry(), new HashSet<>(schemaPathsInExpr));
}
use of org.apache.drill.metastore.metadata.NonInterestingColumnsMetadata in project drill by apache.
the class MetastoreFileTableMetadataProvider method getNonInterestingColumnsMetadata.
@Override
public NonInterestingColumnsMetadata getNonInterestingColumnsMetadata() {
throwIfChanged();
if (nonInterestingColumnsMetadata == null) {
TupleMetadata schema = getTableMetadata().getSchema();
List<StatisticsHolder<?>> statistics = Collections.singletonList(new StatisticsHolder<>(Statistic.NO_COLUMN_STATS, ColumnStatisticsKind.NULLS_COUNT));
List<SchemaPath> columnPaths = SchemaUtil.getSchemaPaths(schema);
List<SchemaPath> interestingColumns = getInterestingColumns(columnPaths);
// populates statistics for non-interesting columns and columns for which statistics wasn't collected
Map<SchemaPath, ColumnStatistics<?>> columnsStatistics = columnPaths.stream().filter(schemaPath -> !interestingColumns.contains(schemaPath) || SchemaPathUtils.getColumnMetadata(schemaPath, schema).isArray()).collect(Collectors.toMap(Function.identity(), schemaPath -> new ColumnStatistics<>(statistics, SchemaPathUtils.getColumnMetadata(schemaPath, schema).type())));
nonInterestingColumnsMetadata = new NonInterestingColumnsMetadata(columnsStatistics);
}
return nonInterestingColumnsMetadata;
}
use of org.apache.drill.metastore.metadata.NonInterestingColumnsMetadata in project drill by apache.
the class ParquetTableMetadataUtils method getNonInterestingColumnsMeta.
/**
* Returns the non-interesting column's metadata
* @param parquetTableMetadata the source of column metadata for non-interesting column's statistics
* @return returns non-interesting columns metadata
*/
public static NonInterestingColumnsMetadata getNonInterestingColumnsMeta(MetadataBase.ParquetTableMetadataBase parquetTableMetadata) {
Map<SchemaPath, ColumnStatistics<?>> columnsStatistics = new HashMap<>();
if (parquetTableMetadata instanceof Metadata_V4.ParquetTableMetadata_v4) {
Map<Metadata_V4.ColumnTypeMetadata_v4.Key, Metadata_V4.ColumnTypeMetadata_v4> columnTypeInfoMap = ((Metadata_V4.ParquetTableMetadata_v4) parquetTableMetadata).getColumnTypeInfoMap();
if (columnTypeInfoMap == null) {
return new NonInterestingColumnsMetadata(columnsStatistics);
}
for (Metadata_V4.ColumnTypeMetadata_v4 columnTypeMetadata : columnTypeInfoMap.values()) {
if (!columnTypeMetadata.isInteresting) {
SchemaPath schemaPath = SchemaPath.getCompoundPath(columnTypeMetadata.name);
List<StatisticsHolder<?>> statistics = new ArrayList<>();
statistics.add(new StatisticsHolder<>(Statistic.NO_COLUMN_STATS, ColumnStatisticsKind.NULLS_COUNT));
PrimitiveType.PrimitiveTypeName primitiveType = columnTypeMetadata.primitiveType;
OriginalType originalType = columnTypeMetadata.originalType;
TypeProtos.MinorType type = ParquetReaderUtility.getMinorType(primitiveType, originalType);
columnsStatistics.put(schemaPath, new ColumnStatistics<>(statistics, type));
}
}
return new NonInterestingColumnsMetadata(columnsStatistics);
}
return new NonInterestingColumnsMetadata(columnsStatistics);
}
Aggregations