use of io.trino.plugin.hive.RecordFileWriter in project trino by trinodb.
the class CheckpointWriter method write.
public void write(ConnectorSession session, CheckpointEntries entries, Path targetPath) {
RowType metadataEntryType = checkpointSchemaManager.getMetadataEntryType();
RowType protocolEntryType = checkpointSchemaManager.getProtocolEntryType();
RowType txnEntryType = checkpointSchemaManager.getTxnEntryType();
RowType addEntryType = checkpointSchemaManager.getAddEntryType(entries.getMetadataEntry());
RowType removeEntryType = checkpointSchemaManager.getRemoveEntryType();
List<String> columnNames = ImmutableList.of("metaData", "protocol", "txn", "add", "remove");
List<Type> columnTypes = ImmutableList.of(metadataEntryType, protocolEntryType, txnEntryType, addEntryType, removeEntryType);
Properties schema = buildSchemaProperties(columnNames, columnTypes);
Configuration conf = hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session), targetPath);
configureCompression(conf, SNAPPY);
JobConf jobConf = toJobConf(conf);
RecordFileWriter writer = new RecordFileWriter(targetPath, columnNames, fromHiveStorageFormat(PARQUET), schema, PARQUET.getEstimatedWriterMemoryUsage(), jobConf, typeManager, DateTimeZone.UTC, session);
PageBuilder pageBuilder = new PageBuilder(columnTypes);
writeMetadataEntry(pageBuilder, metadataEntryType, entries.getMetadataEntry());
writeProtocolEntry(pageBuilder, protocolEntryType, entries.getProtocolEntry());
for (TransactionEntry transactionEntry : entries.getTransactionEntries()) {
writeTransactionEntry(pageBuilder, txnEntryType, transactionEntry);
}
for (AddFileEntry addFileEntry : entries.getAddFileEntries()) {
writeAddFileEntry(pageBuilder, addEntryType, addFileEntry);
}
for (RemoveFileEntry removeFileEntry : entries.getRemoveFileEntries()) {
writeRemoveFileEntry(pageBuilder, removeEntryType, removeFileEntry);
}
// Not writing commit infos for now. DB does not keep them in the checkpoints by default
writer.appendRows(pageBuilder.build());
writer.commit();
}
use of io.trino.plugin.hive.RecordFileWriter in project trino by trinodb.
the class DeltaLakeUpdatablePageSource method createWriter.
private DeltaLakeWriter createWriter(Path targetFile, List<ColumnMetadata> allColumns, List<DeltaLakeColumnHandle> dataColumns) throws IOException {
Configuration conf = hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session), targetFile);
configureCompression(conf, SNAPPY);
Properties schema = DeltaLakePageSink.buildSchemaProperties(dataColumns.stream().map(DeltaLakeColumnHandle::getName).collect(toImmutableList()), dataColumns.stream().map(DeltaLakeColumnHandle::getType).collect(toImmutableList()));
RecordFileWriter recordFileWriter = new RecordFileWriter(targetFile, dataColumns.stream().map(DeltaLakeColumnHandle::getName).collect(toImmutableList()), fromHiveStorageFormat(PARQUET), schema, PARQUET.getEstimatedWriterMemoryUsage(), toJobConf(conf), typeManager, DateTimeZone.UTC, session);
Path tablePath = new Path(tableHandle.getLocation());
Path relativePath = new Path(tablePath.toUri().relativize(targetFile.toUri()));
List<String> partitionValueList = getPartitionValues(allColumns.stream().filter(columnMetadata -> partitionKeys.containsKey(columnMetadata.getName())).collect(toImmutableList()));
return new DeltaLakeWriter(hdfsEnvironment.getFileSystem(hdfsContext, targetFile), recordFileWriter, tablePath, relativePath.toString(), partitionValueList, new DeltaLakeWriterStats(), dataColumns);
}
Aggregations