use of io.prestosql.plugin.hive.util.TempFileReader in project hetu-core by openlookeng.
the class HiveWriterFactory method mergeSubFiles.
public void mergeSubFiles(List<HiveWriter> writers) throws IOException {
if (writers.isEmpty()) {
return;
}
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), new Path(writers.get(0).getFilePath()), conf);
List<Type> types = dataColumns.stream().map(column -> column.getHiveType().getType(typeManager)).collect(toList());
for (HiveWriter writer : writers) {
String filePath = writer.getFilePath();
Path path = new Path(filePath);
logContainingFolderInfo(fileSystem, path, "Merging snapshot files to result file: %s", path);
// The snapshotSuffixes list records the "resumeCount" for each suffix.
// It doesn't has an entry for the current set of files, so an entry is added first.
// The resumeCount helps distinguish files created by different runs.
snapshotSuffixes.add(resumeCount);
for (int i = 0; i < snapshotSuffixes.size(); i++) {
long resume = snapshotSuffixes.get(i);
Path file = new Path(toSnapshotSubFile(filePath, resume, i));
if (fileSystem.exists(file)) {
// TODO-cp-I2BZ0A: assuming all files to be of ORC type.
// Using same parameters as used by SortingFileWriter
FileStatus fileStatus = fileSystem.getFileStatus(file);
try (TempFileReader reader = new TempFileReader(types, new HdfsOrcDataSource(new OrcDataSourceId(file.toString()), fileStatus.getLen(), new DataSize(1, MEGABYTE), new DataSize(8, MEGABYTE), new DataSize(8, MEGABYTE), false, fileSystem.open(file), new FileFormatDataSourceStats(), fileStatus.getModificationTime()))) {
while (reader.hasNext()) {
writer.append(reader.next());
}
}
// DO NOT delete the sub file, in case we need to resume. Delete them when the query finishes.
}
}
}
}
use of io.prestosql.plugin.hive.util.TempFileReader in project hetu-core by openlookeng.
the class SortingFileWriter method mergeFiles.
private void mergeFiles(Iterable<TempFile> files, Consumer<Page> consumer) {
try (Closer closer = Closer.create()) {
Collection<Iterator<Page>> iterators = new ArrayList<>();
for (TempFile tempFile : files) {
Path file = tempFile.getPath();
FileStatus fileStatus = fileSystem.getFileStatus(file);
OrcDataSource dataSource = new HdfsOrcDataSource(new OrcDataSourceId(file.toString()), fileStatus.getLen(), new DataSize(1, MEGABYTE), new DataSize(8, MEGABYTE), new DataSize(8, MEGABYTE), false, fileSystem.open(file), new FileFormatDataSourceStats(), fileStatus.getModificationTime());
TempFileReader reader = new TempFileReader(types, dataSource);
// Closing the reader also closes the data source
closer.register(reader);
iterators.add(reader);
}
new MergingPageIterator(iterators, types, sortFields, sortOrders).forEachRemaining(consumer);
for (TempFile tempFile : files) {
Path file = tempFile.getPath();
fileSystem.delete(file, false);
if (fileSystem.exists(file)) {
throw new IOException("Failed to delete temporary file: " + file);
}
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
Aggregations