use of io.trino.plugin.hive.parquet.ParquetReaderConfig in project trino by trinodb.
the class TestHiveFileFormats method testParquetPageSourceSchemaEvolution.
@Test(dataProvider = "rowCount")
public void testParquetPageSourceSchemaEvolution(int rowCount) throws Exception {
List<TestColumn> writeColumns = getTestColumnsSupportedByParquet();
// test index-based access
List<TestColumn> readColumns = writeColumns.stream().map(column -> new TestColumn(column.getName() + "_new", column.getObjectInspector(), column.getWriteValue(), column.getExpectedValue(), column.isPartitionKey())).collect(toList());
assertThatFileFormat(PARQUET).withWriteColumns(writeColumns).withReadColumns(readColumns).withSession(PARQUET_SESSION).withRowsCount(rowCount).isReadableByPageSource(new ParquetPageSourceFactory(HDFS_ENVIRONMENT, STATS, new ParquetReaderConfig(), new HiveConfig()));
// test name-based access
readColumns = Lists.reverse(writeColumns);
assertThatFileFormat(PARQUET).withWriteColumns(writeColumns).withReadColumns(readColumns).withSession(PARQUET_SESSION_USE_NAME).isReadableByPageSource(new ParquetPageSourceFactory(HDFS_ENVIRONMENT, STATS, new ParquetReaderConfig(), new HiveConfig()));
}
use of io.trino.plugin.hive.parquet.ParquetReaderConfig in project trino by trinodb.
the class TestHiveFileFormats method testTruncateVarcharColumn.
@Test
public void testTruncateVarcharColumn() throws Exception {
TestColumn writeColumn = new TestColumn("varchar_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(4)), new HiveVarchar("test", 4), utf8Slice("test"));
TestColumn readColumn = new TestColumn("varchar_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(3)), new HiveVarchar("tes", 3), utf8Slice("tes"));
assertThatFileFormat(RCTEXT).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig())).isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
assertThatFileFormat(RCBINARY).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig())).isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
assertThatFileFormat(ORC).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByPageSource(new OrcPageSourceFactory(new OrcReaderOptions(), HDFS_ENVIRONMENT, STATS, UTC));
assertThatFileFormat(PARQUET).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).withSession(PARQUET_SESSION).isReadableByPageSource(new ParquetPageSourceFactory(HDFS_ENVIRONMENT, STATS, new ParquetReaderConfig(), new HiveConfig()));
assertThatFileFormat(AVRO).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
assertThatFileFormat(SEQUENCEFILE).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
assertThatFileFormat(TEXTFILE).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
}
use of io.trino.plugin.hive.parquet.ParquetReaderConfig in project trino by trinodb.
the class TestHiveFileFormats method testParquetPageSourceGzip.
@Test(dataProvider = "validRowAndFileSizePadding")
public void testParquetPageSourceGzip(int rowCount, long fileSizePadding) throws Exception {
List<TestColumn> testColumns = getTestColumnsSupportedByParquet();
assertThatFileFormat(PARQUET).withColumns(testColumns).withSession(PARQUET_SESSION).withCompressionCodec(HiveCompressionCodec.GZIP).withFileSizePadding(fileSizePadding).withRowsCount(rowCount).isReadableByPageSource(new ParquetPageSourceFactory(HDFS_ENVIRONMENT, STATS, new ParquetReaderConfig(), new HiveConfig()));
}
use of io.trino.plugin.hive.parquet.ParquetReaderConfig in project trino by trinodb.
the class TestCheckpointWriter method readCheckpoint.
private CheckpointEntries readCheckpoint(Path checkpointPath, MetadataEntry metadataEntry, boolean rowStatisticsEnabled) throws IOException {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(new HdfsEnvironment.HdfsContext(session), checkpointPath);
FileStatus fileStatus = fileSystem.getFileStatus(checkpointPath);
Iterator<DeltaLakeTransactionLogEntry> checkpointEntryIterator = new CheckpointEntryIterator(checkpointPath, session, fileStatus.getLen(), checkpointSchemaManager, typeManager, ImmutableSet.of(METADATA, PROTOCOL, TRANSACTION, ADD, REMOVE), Optional.of(metadataEntry), hdfsEnvironment, new FileFormatDataSourceStats(), new ParquetReaderConfig().toParquetReaderOptions(), rowStatisticsEnabled);
CheckpointBuilder checkpointBuilder = new CheckpointBuilder();
while (checkpointEntryIterator.hasNext()) {
DeltaLakeTransactionLogEntry entry = checkpointEntryIterator.next();
checkpointBuilder.addLogEntry(entry);
}
return checkpointBuilder.build();
}
use of io.trino.plugin.hive.parquet.ParquetReaderConfig in project trino by trinodb.
the class TestTransactionLogAccess method setupTransactionLogAccess.
private void setupTransactionLogAccess(String tableName, Path tableLocation) throws IOException {
TestingConnectorContext context = new TestingConnectorContext();
TypeManager typeManager = context.getTypeManager();
HdfsConfig hdfsConfig = new HdfsConfig();
HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hdfsConfig), ImmutableSet.of());
HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hdfsConfig, new NoHdfsAuthentication());
FileFormatDataSourceStats fileFormatDataSourceStats = new FileFormatDataSourceStats();
transactionLogAccess = new TrackingTransactionLogAccess(tableName, tableLocation, SESSION, typeManager, new CheckpointSchemaManager(typeManager), new DeltaLakeConfig(), fileFormatDataSourceStats, hdfsEnvironment, new ParquetReaderConfig());
DeltaLakeTableHandle tableHandle = new DeltaLakeTableHandle("schema", tableName, "location", // ignored
Optional.empty(), TupleDomain.none(), TupleDomain.none(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), 0);
tableSnapshot = transactionLogAccess.loadSnapshot(tableHandle.getSchemaTableName(), tableLocation, SESSION);
}
Aggregations