use of org.apache.drill.metastore.metadata.RowGroupMetadata in project drill by apache.
the class AbstractParquetGroupScan method pruneRowGroupsForFiles.
protected Multimap<Path, RowGroupMetadata> pruneRowGroupsForFiles(Map<Path, FileMetadata> filteredFileMetadata) {
Multimap<Path, RowGroupMetadata> prunedRowGroups = LinkedListMultimap.create();
for (Path filteredPartition : filteredFileMetadata.keySet()) {
Multimap<Path, RowGroupMetadata> rowGroupsMetadata = getRowGroupsMetadata();
Collection<RowGroupMetadata> filesRowGroupMetadata = rowGroupsMetadata.get(filteredPartition);
if (CollectionUtils.isNotEmpty(filesRowGroupMetadata)) {
prunedRowGroups.putAll(filteredPartition, filesRowGroupMetadata);
}
}
return prunedRowGroups;
}
use of org.apache.drill.metastore.metadata.RowGroupMetadata in project drill by apache.
the class AbstractParquetGroupScan method getRowGroupInfos.
private List<RowGroupInfo> getRowGroupInfos() {
if (rowGroupInfos == null) {
Map<String, CoordinationProtos.DrillbitEndpoint> hostEndpointMap = new HashMap<>();
for (CoordinationProtos.DrillbitEndpoint endpoint : getDrillbits()) {
hostEndpointMap.put(endpoint.getAddress(), endpoint);
}
rowGroupInfos = new ArrayList<>();
for (RowGroupMetadata rowGroupMetadata : getRowGroupsMetadata().values()) {
RowGroupInfo rowGroupInfo = new RowGroupInfo(rowGroupMetadata.getPath(), rowGroupMetadata.getStatistic(() -> ExactStatisticsConstants.START), rowGroupMetadata.getStatistic(() -> ExactStatisticsConstants.LENGTH), rowGroupMetadata.getRowGroupIndex(), TableStatisticsKind.ROW_COUNT.getValue(rowGroupMetadata));
rowGroupInfo.setNumRecordsToRead(rowGroupInfo.getRowCount());
EndpointByteMap endpointByteMap = new EndpointByteMapImpl();
for (String host : rowGroupMetadata.getHostAffinity().keySet()) {
if (hostEndpointMap.containsKey(host)) {
endpointByteMap.add(hostEndpointMap.get(host), (long) (rowGroupMetadata.getHostAffinity().get(host) * (long) rowGroupMetadata.getStatistic(() -> ExactStatisticsConstants.LENGTH)));
}
}
rowGroupInfo.setEndpointByteMap(endpointByteMap);
rowGroupInfos.add(rowGroupInfo);
}
}
return rowGroupInfos;
}
use of org.apache.drill.metastore.metadata.RowGroupMetadata in project drill by apache.
the class ParquetTableMetadataUtils method getRowGroupsMetadata.
/**
* Returns list of {@link RowGroupMetadata} received by converting parquet row groups metadata
* taken from the specified tableMetadata.
* Assigns index to row groups based on their position in files metadata.
* For empty / fake row groups assigns '-1' index.
*
* @param tableMetadata the source of row groups to be converted
* @return list of {@link RowGroupMetadata}
*/
public static Multimap<Path, RowGroupMetadata> getRowGroupsMetadata(MetadataBase.ParquetTableMetadataBase tableMetadata) {
Multimap<Path, RowGroupMetadata> rowGroups = LinkedListMultimap.create();
for (MetadataBase.ParquetFileMetadata file : tableMetadata.getFiles()) {
int index = 0;
for (MetadataBase.RowGroupMetadata rowGroupMetadata : file.getRowGroups()) {
int newIndex;
if (rowGroupMetadata.isEmpty()) {
Preconditions.checkState(file.getRowGroups().size() == 1, "Only one empty / fake row group is allowed per file");
newIndex = -1;
} else {
newIndex = index++;
}
Path filePath = Path.getPathWithoutSchemeAndAuthority(file.getPath());
rowGroups.put(filePath, getRowGroupMetadata(tableMetadata, rowGroupMetadata, newIndex, filePath));
}
}
return rowGroups;
}
use of org.apache.drill.metastore.metadata.RowGroupMetadata in project drill by apache.
the class ParquetTableMetadataUtils method getFileMetadata.
/**
* Returns {@link FileMetadata} instance received by merging specified {@link RowGroupMetadata} list.
*
* @param rowGroups collection of {@link RowGroupMetadata} to be merged
* @return {@link FileMetadata} instance
*/
public static FileMetadata getFileMetadata(Collection<RowGroupMetadata> rowGroups) {
if (rowGroups.isEmpty()) {
return null;
}
List<StatisticsHolder<?>> fileStatistics = new ArrayList<>();
fileStatistics.add(new StatisticsHolder<>(TableStatisticsKind.ROW_COUNT.mergeStatistics(rowGroups), TableStatisticsKind.ROW_COUNT));
RowGroupMetadata rowGroupMetadata = rowGroups.iterator().next();
TupleMetadata schema = rowGroupMetadata.getSchema();
Set<SchemaPath> columns = rowGroupMetadata.getColumnsStatistics().keySet();
MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.FILE).build();
return FileMetadata.builder().tableInfo(rowGroupMetadata.getTableInfo()).metadataInfo(metadataInfo).path(rowGroupMetadata.getPath()).schema(schema).columnsStatistics(TableMetadataUtils.mergeColumnsStatistics(rowGroups, columns, PARQUET_COLUMN_STATISTICS)).metadataStatistics(fileStatistics).build();
}
Aggregations