use of org.apache.drill.metastore.metadata.FileMetadata in project drill by apache.
the class AbstractParquetGroupScan method applyLimit.
// filter push down methods block end
// limit push down methods start
@Override
public GroupScan applyLimit(int maxRecords) {
// Make sure it request at least 1 row -> 1 rowGroup.
maxRecords = Math.max(maxRecords, 1);
if (getTableMetadata() != null) {
long tableRowCount = TableStatisticsKind.ROW_COUNT.getValue(getTableMetadata());
if (tableRowCount == Statistic.NO_COLUMN_STATS || tableRowCount <= maxRecords) {
logger.debug("limit push down does not apply, since total number of rows [{}] is less or equal to the required [{}].", tableRowCount, maxRecords);
return null;
}
}
List<RowGroupMetadata> qualifiedRowGroups = limitMetadata(getRowGroupsMetadata().values(), maxRecords);
if (qualifiedRowGroups == null || getRowGroupsMetadata().size() == qualifiedRowGroups.size()) {
logger.debug("limit push down does not apply, since number of row groups was not reduced.");
return null;
}
Map<Path, FileMetadata> filesMetadata = getFilesMetadata();
Map<Path, FileMetadata> qualifiedFiles = qualifiedRowGroups.stream().map(rowGroup -> filesMetadata.get(rowGroup.getPath())).filter(Objects::nonNull).collect(Collectors.toMap(FileMetadata::getPath, Function.identity()));
Multimap<Path, RowGroupMetadata> prunedRowGroups = LinkedListMultimap.create();
for (RowGroupMetadata qualifiedRowGroup : qualifiedRowGroups) {
prunedRowGroups.put(qualifiedRowGroup.getPath(), qualifiedRowGroup);
}
return getFilterer().rowGroups(prunedRowGroups).table(tableMetadata).partitions(partitions).segments(segments).files(qualifiedFiles).nonInterestingColumns(nonInterestingColumnsMetadata).matching(matchAllMetadata).build();
}
use of org.apache.drill.metastore.metadata.FileMetadata in project drill by apache.
the class ParquetTableMetadataUtils method getPartitionMetadata.
/**
* Returns {@link PartitionMetadata} instance received by merging specified {@link FileMetadata} list.
*
* @param partitionColumn partition column
* @param files list of files to be merged
* @return {@link PartitionMetadata} instance
*/
public static PartitionMetadata getPartitionMetadata(SchemaPath partitionColumn, List<FileMetadata> files) {
Set<Path> locations = new HashSet<>();
Set<SchemaPath> columns = new HashSet<>();
for (FileMetadata file : files) {
columns.addAll(file.getColumnsStatistics().keySet());
locations.add(file.getPath());
}
FileMetadata fileMetadata = files.iterator().next();
MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.PARTITION).build();
return PartitionMetadata.builder().tableInfo(fileMetadata.getTableInfo()).metadataInfo(metadataInfo).column(partitionColumn).schema(fileMetadata.getSchema()).columnsStatistics(TableMetadataUtils.mergeColumnsStatistics(files, columns, PARQUET_COLUMN_STATISTICS)).metadataStatistics(Collections.singletonList(new StatisticsHolder<>(TableStatisticsKind.ROW_COUNT.mergeStatistics(files), TableStatisticsKind.ROW_COUNT))).partitionValues(Collections.emptyList()).locations(locations).build();
}
use of org.apache.drill.metastore.metadata.FileMetadata in project drill by apache.
the class BaseParquetMetadataProvider method getSegmentsMetadataMap.
@SuppressWarnings("unused")
@Override
public Map<Path, SegmentMetadata> getSegmentsMetadataMap() {
if (segments == null) {
if (entries.isEmpty() || !collectMetadata) {
return Collections.emptyMap();
}
segments = new LinkedHashMap<>();
Path fileLocation = getFilesMetadataMap().values().iterator().next().getPath();
int levelsCount = fileLocation.depth() - tableLocation.depth();
Map<Path, FileMetadata> filesMetadata = getFilesMetadataMap();
int segmentsIndex = levelsCount - 1;
Map<Path, SegmentMetadata> segmentMetadata = getSegmentsForMetadata(filesMetadata, SchemaPath.getSimplePath(MetadataInfo.DEFAULT_COLUMN_PREFIX + segmentsIndex));
segments.putAll(segmentMetadata);
for (int i = segmentsIndex - 1; i >= 0; i--) {
String segmentColumn = MetadataInfo.DEFAULT_COLUMN_PREFIX + i;
segmentMetadata = getMetadataForSegments(segmentMetadata, SchemaPath.getSimplePath(segmentColumn));
segments.putAll(segmentMetadata);
}
}
return segments;
}
use of org.apache.drill.metastore.metadata.FileMetadata in project drill by apache.
the class BasicTablesTransformer method all.
public static MetadataHolder all(List<TableMetadataUnit> units) {
List<BaseTableMetadata> tables = new ArrayList<>();
List<SegmentMetadata> segments = new ArrayList<>();
List<FileMetadata> files = new ArrayList<>();
List<RowGroupMetadata> rowGroups = new ArrayList<>();
List<PartitionMetadata> partitions = new ArrayList<>();
for (TableMetadataUnit unit : units) {
MetadataType metadataType = MetadataType.fromValue(unit.metadataType());
if (metadataType == null) {
continue;
}
switch(metadataType) {
case TABLE:
tables.add(BaseTableMetadata.builder().metadataUnit(unit).build());
break;
case SEGMENT:
segments.add(SegmentMetadata.builder().metadataUnit(unit).build());
break;
case FILE:
files.add(FileMetadata.builder().metadataUnit(unit).build());
break;
case ROW_GROUP:
rowGroups.add(RowGroupMetadata.builder().metadataUnit(unit).build());
break;
case PARTITION:
partitions.add(PartitionMetadata.builder().metadataUnit(unit).build());
break;
default:
// Ignore unsupported type
break;
}
}
return new MetadataHolder(tables, segments, files, rowGroups, partitions);
}
use of org.apache.drill.metastore.metadata.FileMetadata in project drill by apache.
the class MetadataControllerBatch method getFileMetadata.
private FileMetadata getFileMetadata(TupleReader reader, List<StatisticsHolder<?>> metadataStatistics, Map<SchemaPath, ColumnStatistics<?>> columnStatistics, int nestingLevel) {
List<String> segmentColumns = popConfig.getContext().segmentColumns();
String segmentKey = segmentColumns.size() > 0 ? reader.column(segmentColumns.iterator().next()).scalar().getString() : MetadataInfo.DEFAULT_SEGMENT_KEY;
List<String> partitionValues = segmentColumns.stream().limit(nestingLevel - 1).map(columnName -> reader.column(columnName).scalar().getString()).collect(Collectors.toList());
Path path = new Path(reader.column(MetastoreAnalyzeConstants.LOCATION_FIELD).scalar().getString());
String metadataIdentifier = MetadataIdentifierUtils.getFileMetadataIdentifier(partitionValues, path);
MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.FILE).key(segmentKey).identifier(StringUtils.defaultIfEmpty(metadataIdentifier, null)).build();
return FileMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).columnsStatistics(columnStatistics).metadataStatistics(metadataStatistics).path(path).lastModifiedTime(Long.parseLong(reader.column(columnNamesOptions.lastModifiedTime()).scalar().getString())).schema(TupleMetadata.of(reader.column(MetastoreAnalyzeConstants.SCHEMA_FIELD).scalar().getString())).build();
}
Aggregations