use of org.apache.drill.exec.store.dfs.FileSelection in project drill by apache.
the class ParquetGroupScan method initFromMetadataCache.
/**
* Create and return a new file selection based on reading the metadata cache file.
*
* This function also initializes a few of ParquetGroupScan's fields as appropriate.
*
* @param selection initial file selection
* @param metaFilePath metadata cache file path
* @return file selection read from cache
*
* @throws IOException
* @throws UserException when the updated selection is empty, this happens if the user selects an empty folder.
*/
private FileSelection initFromMetadataCache(FileSelection selection, Path metaFilePath) throws IOException {
// get the metadata for the root directory by reading the metadata file
// parquetTableMetadata contains the metadata for all files in the selection root folder, but we need to make sure
// we only select the files that are part of selection (by setting fileSet appropriately)
// get (and set internal field) the metadata for the directory by reading the metadata file
this.parquetTableMetadata = Metadata.readBlockMeta(fs, metaFilePath.toString(), selection.getMetaContext(), formatConfig);
if (formatConfig.autoCorrectCorruptDates) {
ParquetReaderUtility.correctDatesInMetadataCache(this.parquetTableMetadata);
}
List<FileStatus> fileStatuses = selection.getStatuses(fs);
if (fileSet == null) {
fileSet = Sets.newHashSet();
}
final Path first = fileStatuses.get(0).getPath();
if (fileStatuses.size() == 1 && selection.getSelectionRoot().equals(first.toString())) {
// we are selecting all files from selection root. Expand the file list from the cache
for (Metadata.ParquetFileMetadata file : parquetTableMetadata.getFiles()) {
fileSet.add(file.getPath());
}
} else if (selection.isExpandedPartial() && !selection.hadWildcard() && cacheFileRoot != null) {
if (selection.wasAllPartitionsPruned()) {
// if all partitions were previously pruned, we only need to read 1 file (for the schema)
fileSet.add(this.parquetTableMetadata.getFiles().get(0).getPath());
} else {
// second phase of partition pruning will apply on the files and modify the file selection appropriately.
for (Metadata.ParquetFileMetadata file : this.parquetTableMetadata.getFiles()) {
fileSet.add(file.getPath());
}
}
} else {
// we need to expand the files from fileStatuses
for (FileStatus status : fileStatuses) {
if (status.isDirectory()) {
//TODO [DRILL-4496] read the metadata cache files in parallel
final Path metaPath = new Path(status.getPath(), Metadata.METADATA_FILENAME);
final Metadata.ParquetTableMetadataBase metadata = Metadata.readBlockMeta(fs, metaPath.toString(), selection.getMetaContext(), formatConfig);
for (Metadata.ParquetFileMetadata file : metadata.getFiles()) {
fileSet.add(file.getPath());
}
} else {
final Path path = Path.getPathWithoutSchemeAndAuthority(status.getPath());
fileSet.add(path.toString());
}
}
}
if (fileSet.isEmpty()) {
// no files were found, most likely we tried to query some empty sub folders
throw UserException.validationError().message("The table you tried to query is empty").build(logger);
}
List<String> fileNames = Lists.newArrayList(fileSet);
// when creating the file selection, set the selection root without the URI prefix
// The reason is that the file names above have been created in the form
// /a/b/c.parquet and the format of the selection root must match that of the file names
// otherwise downstream operations such as partition pruning can break.
final Path metaRootPath = Path.getPathWithoutSchemeAndAuthority(new Path(selection.getSelectionRoot()));
this.selectionRoot = metaRootPath.toString();
// Use the FileSelection constructor directly here instead of the FileSelection.create() method
// because create() changes the root to include the scheme and authority; In future, if create()
// is the preferred way to instantiate a file selection, we may need to do something different...
// WARNING: file statuses and file names are inconsistent
FileSelection newSelection = new FileSelection(selection.getStatuses(fs), fileNames, metaRootPath.toString(), cacheFileRoot, selection.wasAllPartitionsPruned());
newSelection.setExpandedFully();
newSelection.setMetaContext(selection.getMetaContext());
return newSelection;
}
use of org.apache.drill.exec.store.dfs.FileSelection in project drill by apache.
the class FileSystemPartitionDescriptor method createNewTableScanFromSelection.
private TableScan createNewTableScanFromSelection(EnumerableTableScan oldScan, List<String> newFiles, String cacheFileRoot, boolean wasAllPartitionsPruned, MetadataContext metaContext) {
final RelOptTableImpl t = (RelOptTableImpl) oldScan.getTable();
final FormatSelection formatSelection = (FormatSelection) table.getSelection();
final FileSelection newFileSelection = new FileSelection(null, newFiles, getBaseTableLocation(), cacheFileRoot, wasAllPartitionsPruned, formatSelection.getSelection().getDirStatus());
newFileSelection.setMetaContext(metaContext);
final FormatSelection newFormatSelection = new FormatSelection(formatSelection.getFormat(), newFileSelection);
final DrillTranslatableTable newTable = new DrillTranslatableTable(new DynamicDrillTable(table.getPlugin(), table.getStorageEngineName(), table.getUserName(), newFormatSelection));
final RelOptTableImpl newOptTableImpl = RelOptTableImpl.create(t.getRelOptSchema(), t.getRowType(), newTable);
// return an EnumerableTableScan with fileSelection being part of digest of TableScan node.
return DirPrunedEnumerableTableScan.create(oldScan.getCluster(), newOptTableImpl, newFileSelection.toString());
}
use of org.apache.drill.exec.store.dfs.FileSelection in project drill by apache.
the class FileSystemPartitionDescriptor method getFileLocationsAndStatus.
protected Pair<Collection<String>, Boolean> getFileLocationsAndStatus() {
Collection<String> fileLocations = null;
Pair<Collection<String>, Boolean> fileLocationsAndStatus = null;
boolean isExpandedPartial = false;
if (scanRel instanceof DrillScanRel) {
// If a particular GroupScan provides files, get the list of files from there rather than
// DrillTable because GroupScan would have the updated version of the selection
final DrillScanRel drillScan = (DrillScanRel) scanRel;
if (drillScan.getGroupScan().hasFiles()) {
fileLocations = drillScan.getGroupScan().getFiles();
isExpandedPartial = false;
} else {
FileSelection selection = ((FormatSelection) table.getSelection()).getSelection();
fileLocations = selection.getFiles();
isExpandedPartial = selection.isExpandedPartial();
}
} else if (scanRel instanceof EnumerableTableScan) {
FileSelection selection = ((FormatSelection) table.getSelection()).getSelection();
fileLocations = selection.getFiles();
isExpandedPartial = selection.isExpandedPartial();
}
fileLocationsAndStatus = Pair.of(fileLocations, isExpandedPartial);
return fileLocationsAndStatus;
}
Aggregations