use of org.apache.drill.exec.store.dfs.ReadEntryWithPath in project drill by apache.
the class ParquetGroupScan method modifyFileSelection.
@Override
public void modifyFileSelection(FileSelection selection) {
entries.clear();
fileSet = Sets.newHashSet();
for (String fileName : selection.getFiles()) {
entries.add(new ReadEntryWithPath(fileName));
fileSet.add(fileName);
}
List<RowGroupInfo> newRowGroupList = Lists.newArrayList();
for (RowGroupInfo rowGroupInfo : rowGroupInfos) {
if (fileSet.contains(rowGroupInfo.getPath())) {
newRowGroupList.add(rowGroupInfo);
}
}
this.rowGroupInfos = newRowGroupList;
}
use of org.apache.drill.exec.store.dfs.ReadEntryWithPath in project drill by axbaretto.
the class ParquetGroupScan method modifyFileSelection.
@Override
public void modifyFileSelection(FileSelection selection) {
entries.clear();
fileSet = Sets.newHashSet();
for (String fileName : selection.getFiles()) {
entries.add(new ReadEntryWithPath(fileName));
fileSet.add(fileName);
}
List<RowGroupInfo> newRowGroupList = Lists.newArrayList();
for (RowGroupInfo rowGroupInfo : rowGroupInfos) {
if (fileSet.contains(rowGroupInfo.getPath())) {
newRowGroupList.add(rowGroupInfo);
}
}
this.rowGroupInfos = newRowGroupList;
}
use of org.apache.drill.exec.store.dfs.ReadEntryWithPath in project drill by apache.
the class HiveParquetTableMetadataProvider method initInternal.
@Override
protected void initInternal() throws IOException {
Map<FileStatus, FileSystem> fileStatusConfMap = new LinkedHashMap<>();
for (ReadEntryWithPath entry : entries) {
Path path = entry.getPath();
Configuration conf = new ProjectionPusher().pushProjectionsAndFilters(new JobConf(hiveStoragePlugin.getHiveConf()), path.getParent());
FileSystem fs = path.getFileSystem(conf);
fileStatusConfMap.put(fs.getFileStatus(Path.getPathWithoutSchemeAndAuthority(path)), fs);
}
parquetTableMetadata = Metadata.getParquetTableMetadata(fileStatusConfMap, readerConfig);
}
use of org.apache.drill.exec.store.dfs.ReadEntryWithPath in project drill by apache.
the class ParquetTableMetadataProviderImpl method initInternal.
@Override
protected void initInternal() throws IOException {
try (FileSystem processUserFileSystem = ImpersonationUtil.createFileSystem(ImpersonationUtil.getProcessUserName(), fs.getConf())) {
// Depending on the version of metadata this may represent more than 1 metadata file paths.
List<Path> metaPaths = new ArrayList<>();
if (entries.size() == 1 && parquetTableMetadata == null) {
Path p = Path.getPathWithoutSchemeAndAuthority(entries.get(0).getPath());
if (fs.isDirectory(p)) {
// Using the metadata file makes sense when querying a directory; otherwise
// if querying a single file we can look up the metadata directly from the file
metaPaths = populateMetaPaths(p, fs);
}
if (!metaContext.isMetadataCacheCorrupted() && !metaPaths.isEmpty()) {
parquetTableMetadata = Metadata.readBlockMeta(processUserFileSystem, metaPaths, metaContext, readerConfig);
if (parquetTableMetadata != null) {
usedMetadataCache = true;
}
}
if (!usedMetadataCache) {
parquetTableMetadata = Metadata.getParquetTableMetadata(processUserFileSystem, p, readerConfig);
}
} else {
Path p = Path.getPathWithoutSchemeAndAuthority(selectionRoot);
metaPaths = populateMetaPaths(p, fs);
if (!metaContext.isMetadataCacheCorrupted() && fs.isDirectory(selectionRoot) && !metaPaths.isEmpty()) {
if (parquetTableMetadata == null) {
parquetTableMetadata = Metadata.readBlockMeta(processUserFileSystem, metaPaths, metaContext, readerConfig);
}
if (parquetTableMetadata != null) {
usedMetadataCache = true;
if (fileSet != null) {
parquetTableMetadata = removeUnneededRowGroups(parquetTableMetadata);
}
}
}
if (!usedMetadataCache) {
final List<FileStatus> fileStatuses = new ArrayList<>();
for (ReadEntryWithPath entry : entries) {
fileStatuses.addAll(DrillFileSystemUtil.listFiles(fs, Path.getPathWithoutSchemeAndAuthority(entry.getPath()), true));
}
Map<FileStatus, FileSystem> statusMap = fileStatuses.stream().collect(Collectors.toMap(Function.identity(), s -> processUserFileSystem, (oldFs, newFs) -> newFs, LinkedHashMap::new));
parquetTableMetadata = Metadata.getParquetTableMetadata(statusMap, readerConfig);
}
}
}
}
use of org.apache.drill.exec.store.dfs.ReadEntryWithPath in project drill by apache.
the class AbstractParquetGroupScan method modifyFileSelection.
// limit push down methods end
// helper method used for partition pruning and filter push down
@Override
public void modifyFileSelection(FileSelection selection) {
super.modifyFileSelection(selection);
List<Path> files = selection.getFiles();
fileSet = new HashSet<>(files);
entries = new ArrayList<>(files.size());
entries.addAll(files.stream().map(ReadEntryWithPath::new).collect(Collectors.toList()));
Multimap<Path, RowGroupMetadata> newRowGroups = LinkedListMultimap.create();
if (!getRowGroupsMetadata().isEmpty()) {
getRowGroupsMetadata().entries().stream().filter(entry -> fileSet.contains(entry.getKey())).forEachOrdered(entry -> newRowGroups.put(entry.getKey(), entry.getValue()));
}
this.rowGroups = newRowGroups;
tableMetadata = TableMetadataUtils.updateRowCount(getTableMetadata(), getRowGroupsMetadata().values());
if (!getFilesMetadata().isEmpty()) {
this.files = getFilesMetadata().entrySet().stream().filter(entry -> fileSet.contains(entry.getKey())).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
} else {
this.files = Collections.emptyMap();
}
List<PartitionMetadata> newPartitions = new ArrayList<>();
if (!getPartitionsMetadata().isEmpty()) {
for (PartitionMetadata entry : getPartitionsMetadata()) {
for (Path partLocation : entry.getLocations()) {
if (fileSet.contains(partLocation)) {
newPartitions.add(entry);
break;
}
}
}
}
partitions = newPartitions;
if (!getSegmentsMetadata().isEmpty()) {
this.segments = getSegmentsMetadata().entrySet().stream().filter(entry -> fileSet.contains(entry.getKey())).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
}
rowGroupInfos = null;
}
Aggregations