use of com.facebook.presto.hive.PartitionUpdate.FileWriteInfo in project presto by prestodb.
the class HiveWriterFactory method createWriter.
public HiveWriter createWriter(Page partitionColumns, int position, OptionalInt bucketNumber) {
if (bucketCount.isPresent()) {
checkArgument(bucketNumber.isPresent(), "Bucket not provided for bucketed table");
checkArgument(bucketNumber.getAsInt() < bucketCount.getAsInt(), "Bucket number %s must be less than bucket count %s", bucketNumber, bucketCount);
} else {
checkArgument(!bucketNumber.isPresent(), "Bucket number provided by for table that is not bucketed");
}
List<String> partitionValues = createPartitionValues(partitionColumnTypes, partitionColumns, position);
Optional<String> partitionName;
if (!partitionColumnNames.isEmpty()) {
partitionName = Optional.of(FileUtils.makePartName(partitionColumnNames, partitionValues));
} else {
partitionName = Optional.empty();
}
WriterParameters writerParameters = getWriterParameters(partitionName, bucketNumber);
Properties schema = writerParameters.getSchema();
schema.putAll(additionalTableParameters);
validateSchema(partitionName, writerParameters.getSchema());
String extension = getFileExtension(writerParameters.getOutputStorageFormat(), compressionCodec);
String targetFileName;
if (bucketNumber.isPresent()) {
// Use the bucket number for file name when fileRenaming is enabled
targetFileName = isFileRenamingEnabled(session) ? String.valueOf(bucketNumber.getAsInt()) : computeBucketedFileName(queryId, bucketNumber.getAsInt()) + extension;
} else {
targetFileName = queryId + "_" + randomUUID() + extension;
}
String writeFileName;
if (writeToTempFile) {
writeFileName = ".tmp.presto." + queryId + "_" + randomUUID() + extension;
} else {
writeFileName = targetFileName;
}
Path path = new Path(writerParameters.getWriteInfo().getWritePath(), writeFileName);
HiveFileWriter hiveFileWriter = null;
for (HiveFileWriterFactory fileWriterFactory : fileWriterFactories) {
Optional<HiveFileWriter> fileWriter = fileWriterFactory.createFileWriter(path, dataColumns.stream().map(DataColumn::getName).collect(toList()), writerParameters.getOutputStorageFormat(), schema, conf, session, encryptionInformation);
if (fileWriter.isPresent()) {
hiveFileWriter = fileWriter.get();
break;
}
}
if (hiveFileWriter == null) {
hiveFileWriter = new RecordFileWriter(path, dataColumns.stream().map(DataColumn::getName).collect(toList()), writerParameters.getOutputStorageFormat(), schema, partitionStorageFormat.getEstimatedWriterSystemMemoryUsage(), conf, typeManager, session);
}
if (sortingFileWriterFactory.isPresent()) {
// File number in createSortingFileWriter() is used for determining the temporary directory to store the temporary file.
// Limit file number for unbucketed table to have the same magnitude as bucket number
hiveFileWriter = sortingFileWriterFactory.get().createSortingFileWriter(path, hiveFileWriter, bucketNumber.orElse(abs(path.hashCode() % 1024)), writerParameters.getWriteInfo().getTempPath());
}
boolean writeTempData = locationHandle.getTableType() == TEMPORARY || locationHandle.getTempPath().isPresent() || writeToTempFile;
return new HiveWriter(hiveFileWriter, partitionName, writerParameters.getUpdateMode(), new FileWriteInfo(writeFileName, targetFileName, Optional.empty()), writerParameters.getWriteInfo().getWritePath().toString(), writerParameters.getWriteInfo().getTargetPath().toString(), createCommitEventListener(path, partitionName, hiveFileWriter, writerParameters), hiveWriterStats, writeTempData);
}
use of com.facebook.presto.hive.PartitionUpdate.FileWriteInfo in project presto by prestodb.
the class HivePageSink method updateFileInfo.
private void updateFileInfo(List<Slice> partitionUpdatesWithRenamedFileNames, SettableFuture<?> renamingFuture, PartitionUpdate partitionUpdate, String fileName, FileWriteInfo fileWriteInfo, int writerIndex) {
// Update the file info in partitionUpdate with new filename
FileWriteInfo fileInfoWithRenamedFileName = new FileWriteInfo(fileName, fileName, fileWriteInfo.getFileSize());
PartitionUpdate partitionUpdateWithRenamedFileName = new PartitionUpdate(partitionUpdate.getName(), partitionUpdate.getUpdateMode(), partitionUpdate.getWritePath(), partitionUpdate.getTargetPath(), ImmutableList.of(fileInfoWithRenamedFileName), partitionUpdate.getRowCount(), partitionUpdate.getInMemoryDataSizeInBytes(), partitionUpdate.getOnDiskDataSizeInBytes(), true);
partitionUpdatesWithRenamedFileNames.add(wrappedBuffer(partitionUpdateCodec.toJsonBytes(partitionUpdateWithRenamedFileName)));
hiveMetadataUpdater.removeResultFuture(writerIndex);
renamingFuture.set(null);
}
use of com.facebook.presto.hive.PartitionUpdate.FileWriteInfo in project presto by prestodb.
the class HivePageSink method renameFiles.
private void renameFiles(String fileName, int writerIndex, SettableFuture<?> renamingFuture, List<Slice> partitionUpdatesWithRenamedFileNames) {
HdfsContext context = new HdfsContext(session, schemaName, tableName, writerFactory.getLocationHandle().getTargetPath().toString(), writerFactory.isCreateTable());
HiveWriter writer = writers.get(writerIndex);
PartitionUpdate partitionUpdate = writer.getPartitionUpdate();
// Check that only one file is written by a writer
checkArgument(partitionUpdate.getFileWriteInfos().size() == 1, "HiveWriter wrote data to more than one file");
FileWriteInfo fileWriteInfo = partitionUpdate.getFileWriteInfos().get(0);
Path fromPath = new Path(partitionUpdate.getWritePath(), fileWriteInfo.getWriteFileName());
Path toPath = new Path(partitionUpdate.getWritePath(), fileName);
try {
ExtendedFileSystem fileSystem = hdfsEnvironment.getFileSystem(context, fromPath);
ListenableFuture<Void> asyncFuture = fileSystem.renameFileAsync(fromPath, toPath);
addSuccessCallback(asyncFuture, () -> updateFileInfo(partitionUpdatesWithRenamedFileNames, renamingFuture, partitionUpdate, fileName, fileWriteInfo, writerIndex));
} catch (IOException e) {
throw new PrestoException(HIVE_FILESYSTEM_ERROR, format("Error renaming file. fromPath: %s toPath: %s", fromPath, toPath), e);
}
}
use of com.facebook.presto.hive.PartitionUpdate.FileWriteInfo in project presto by prestodb.
the class HiveMetadata method computePartitionUpdatesForMissingBuckets.
private List<PartitionUpdate> computePartitionUpdatesForMissingBuckets(ConnectorSession session, HiveWritableTableHandle handle, Table table, List<PartitionUpdate> partitionUpdates) {
// avoid creation of PartitionUpdate with empty list of files
if (!shouldCreateFilesForMissingBuckets(table, session)) {
return ImmutableList.of();
}
HiveStorageFormat storageFormat = table.getPartitionColumns().isEmpty() ? handle.getTableStorageFormat() : handle.getPartitionStorageFormat();
// empty un-partitioned bucketed table
if (table.getPartitionColumns().isEmpty() && partitionUpdates.isEmpty()) {
int bucketCount = handle.getBucketProperty().get().getBucketCount();
LocationHandle locationHandle = handle.getLocationHandle();
List<String> fileNamesForMissingBuckets = computeFileNamesForMissingBuckets(session, storageFormat, handle.getCompressionCodec(), bucketCount, ImmutableSet.of());
return ImmutableList.of(new PartitionUpdate("", (handle instanceof HiveInsertTableHandle) ? APPEND : NEW, locationHandle.getWritePath(), locationHandle.getTargetPath(), fileNamesForMissingBuckets.stream().map(fileName -> new FileWriteInfo(fileName, fileName, Optional.of(0L))).collect(toImmutableList()), 0, 0, 0, isFileRenamingEnabled(session)));
}
ImmutableList.Builder<PartitionUpdate> partitionUpdatesForMissingBucketsBuilder = ImmutableList.builder();
for (PartitionUpdate partitionUpdate : partitionUpdates) {
int bucketCount = handle.getBucketProperty().get().getBucketCount();
List<String> fileNamesForMissingBuckets = computeFileNamesForMissingBuckets(session, storageFormat, handle.getCompressionCodec(), bucketCount, ImmutableSet.copyOf(getTargetFileNames(partitionUpdate.getFileWriteInfos())));
partitionUpdatesForMissingBucketsBuilder.add(new PartitionUpdate(partitionUpdate.getName(), partitionUpdate.getUpdateMode(), partitionUpdate.getWritePath(), partitionUpdate.getTargetPath(), fileNamesForMissingBuckets.stream().map(fileName -> new FileWriteInfo(fileName, fileName, Optional.of(0L))).collect(toImmutableList()), 0, 0, 0, isFileRenamingEnabled(session)));
}
return partitionUpdatesForMissingBucketsBuilder.build();
}
use of com.facebook.presto.hive.PartitionUpdate.FileWriteInfo in project presto by prestodb.
the class HiveManifestUtils method createPartitionManifest.
public static Optional<Page> createPartitionManifest(PartitionUpdate partitionUpdate) {
// Manifest Page layout:
// fileName fileSize
// X X
// X X
// X X
// ....
PageBuilder manifestBuilder = new PageBuilder(ImmutableList.of(VARCHAR, BIGINT));
BlockBuilder fileNameBuilder = manifestBuilder.getBlockBuilder(0);
BlockBuilder fileSizeBuilder = manifestBuilder.getBlockBuilder(1);
for (FileWriteInfo fileWriteInfo : partitionUpdate.getFileWriteInfos()) {
if (!fileWriteInfo.getFileSize().isPresent()) {
return Optional.empty();
}
manifestBuilder.declarePosition();
VARCHAR.writeSlice(fileNameBuilder, utf8Slice(fileWriteInfo.getWriteFileName()));
BIGINT.writeLong(fileSizeBuilder, fileWriteInfo.getFileSize().get());
}
return Optional.of(manifestBuilder.build());
}
Aggregations