use of io.trino.plugin.hive.HiveStorageFormat.PARQUET in project trino by trinodb.
the class DeltaLakePageSink method createParquetFileWriter.
private FileWriter createParquetFileWriter(Path path) {
ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder().setMaxBlockSize(getParquetWriterBlockSize(session)).setMaxPageSize(getParquetWriterPageSize(session)).build();
CompressionCodecName compressionCodecName = getCompressionCodec(session).getParquetCompressionCodec();
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, conf);
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
List<Type> parquetTypes = dataColumnTypes.stream().map(type -> {
if (type instanceof TimestampWithTimeZoneType) {
verify(((TimestampWithTimeZoneType) type).getPrecision() == 3, "Unsupported type: %s", type);
return TIMESTAMP_MILLIS;
}
return type;
}).collect(toImmutableList());
// we use identity column mapping; input page already contains only data columns per
// DataLagePageSink.getDataPage()
int[] identityMapping = new int[dataColumnTypes.size()];
for (int i = 0; i < identityMapping.length; ++i) {
identityMapping[i] = i;
}
ParquetSchemaConverter schemaConverter = new ParquetSchemaConverter(parquetTypes, dataColumnNames);
return new ParquetFileWriter(fileSystem.create(path), rollbackAction, parquetTypes, schemaConverter.getMessageType(), schemaConverter.getPrimitiveTypes(), parquetWriterOptions, identityMapping, compressionCodecName, trinoVersion);
} catch (IOException e) {
throw new TrinoException(DELTA_LAKE_BAD_WRITE, "Error creating Parquet file", e);
}
}
use of io.trino.plugin.hive.HiveStorageFormat.PARQUET in project trino by trinodb.
the class TestHiveFileFormats method testParquetPageSourceSchemaEvolution.
@Test(dataProvider = "rowCount")
public void testParquetPageSourceSchemaEvolution(int rowCount) throws Exception {
List<TestColumn> writeColumns = getTestColumnsSupportedByParquet();
// test index-based access
List<TestColumn> readColumns = writeColumns.stream().map(column -> new TestColumn(column.getName() + "_new", column.getObjectInspector(), column.getWriteValue(), column.getExpectedValue(), column.isPartitionKey())).collect(toList());
assertThatFileFormat(PARQUET).withWriteColumns(writeColumns).withReadColumns(readColumns).withSession(PARQUET_SESSION).withRowsCount(rowCount).isReadableByPageSource(new ParquetPageSourceFactory(HDFS_ENVIRONMENT, STATS, new ParquetReaderConfig(), new HiveConfig()));
// test name-based access
readColumns = Lists.reverse(writeColumns);
assertThatFileFormat(PARQUET).withWriteColumns(writeColumns).withReadColumns(readColumns).withSession(PARQUET_SESSION_USE_NAME).isReadableByPageSource(new ParquetPageSourceFactory(HDFS_ENVIRONMENT, STATS, new ParquetReaderConfig(), new HiveConfig()));
}
Aggregations