use of org.apache.drill.exec.store.TimedRunnable in project drill by apache.
the class Metadata method getParquetFileMetadata_v3.
/**
* Get a list of file metadata for a list of parquet files
*
* @param fileStatuses
* @return
* @throws IOException
*/
private List<ParquetFileMetadata_v3> getParquetFileMetadata_v3(ParquetTableMetadata_v3 parquetTableMetadata_v3, List<FileStatus> fileStatuses) throws IOException {
List<TimedRunnable<ParquetFileMetadata_v3>> gatherers = Lists.newArrayList();
for (FileStatus file : fileStatuses) {
gatherers.add(new MetadataGatherer(parquetTableMetadata_v3, file));
}
List<ParquetFileMetadata_v3> metaDataList = Lists.newArrayList();
metaDataList.addAll(TimedRunnable.run("Fetch parquet metadata", logger, gatherers, 16));
return metaDataList;
}
use of org.apache.drill.exec.store.TimedRunnable in project drill by axbaretto.
the class FooterGatherer method getFooters.
public static List<Footer> getFooters(final Configuration conf, List<FileStatus> statuses, int parallelism) throws IOException {
final List<TimedRunnable<Footer>> readers = Lists.newArrayList();
List<Footer> foundFooters = Lists.newArrayList();
for (FileStatus status : statuses) {
if (status.isDirectory()) {
// first we check for summary file.
FileSystem fs = status.getPath().getFileSystem(conf);
final Path summaryPath = new Path(status.getPath(), ParquetFileWriter.PARQUET_METADATA_FILE);
if (fs.exists(summaryPath)) {
FileStatus summaryStatus = fs.getFileStatus(summaryPath);
foundFooters.addAll(ParquetFileReader.readSummaryFile(conf, summaryStatus));
continue;
}
// else we handle as normal file.
for (FileStatus inStatus : DrillFileSystemUtil.listFiles(fs, status.getPath(), false)) {
readers.add(new FooterReader(conf, inStatus));
}
} else {
readers.add(new FooterReader(conf, status));
}
}
if (!readers.isEmpty()) {
foundFooters.addAll(TimedRunnable.run("Fetch Parquet Footers", logger, readers, parallelism));
}
return foundFooters;
}
use of org.apache.drill.exec.store.TimedRunnable in project drill by axbaretto.
the class BlockMapBuilder method generateFileWork.
public List<CompleteFileWork> generateFileWork(List<FileStatus> files, boolean blockify) throws IOException {
List<TimedRunnable<List<CompleteFileWork>>> readers = Lists.newArrayList();
for (FileStatus status : files) {
readers.add(new BlockMapReader(status, blockify));
}
List<List<CompleteFileWork>> work = TimedRunnable.run("Get block maps", logger, readers, 16);
List<CompleteFileWork> singleList = Lists.newArrayList();
for (List<CompleteFileWork> innerWorkList : work) {
singleList.addAll(innerWorkList);
}
return singleList;
}
use of org.apache.drill.exec.store.TimedRunnable in project drill by axbaretto.
the class Metadata method getParquetFileMetadata_v3.
/**
* Get a list of file metadata for a list of parquet files
*
* @param parquetTableMetadata_v3 can store column schema info from all the files and row groups
* @param fileStatuses list of the parquet files statuses
*
* @return list of the parquet file metadata with absolute paths
* @throws IOException is thrown in case of issues while executing the list of runnables
*/
private List<ParquetFileMetadata_v3> getParquetFileMetadata_v3(ParquetTableMetadata_v3 parquetTableMetadata_v3, List<FileStatus> fileStatuses) throws IOException {
List<TimedRunnable<ParquetFileMetadata_v3>> gatherers = Lists.newArrayList();
for (FileStatus file : fileStatuses) {
gatherers.add(new MetadataGatherer(parquetTableMetadata_v3, file));
}
List<ParquetFileMetadata_v3> metaDataList = Lists.newArrayList();
metaDataList.addAll(TimedRunnable.run("Fetch parquet metadata", logger, gatherers, 16));
return metaDataList;
}
Aggregations