use of io.trino.plugin.hive.parquet.ParquetPageSourceFactory in project trino by trinodb.
the class TestHiveFileFormats method testParquetPageSource.
@Test(dataProvider = "validRowAndFileSizePadding")
public void testParquetPageSource(int rowCount, long fileSizePadding) throws Exception {
List<TestColumn> testColumns = getTestColumnsSupportedByParquet();
assertThatFileFormat(PARQUET).withColumns(testColumns).withSession(PARQUET_SESSION).withRowsCount(rowCount).withFileSizePadding(fileSizePadding).isReadableByPageSource(new ParquetPageSourceFactory(HDFS_ENVIRONMENT, STATS, new ParquetReaderConfig(), new HiveConfig()));
}
use of io.trino.plugin.hive.parquet.ParquetPageSourceFactory in project trino by trinodb.
the class TestHiveFileFormats method testParquetProjectedColumns.
@Test(dataProvider = "rowCount")
public void testParquetProjectedColumns(int rowCount) throws Exception {
List<TestColumn> supportedColumns = getTestColumnsSupportedByParquet();
List<TestColumn> regularColumns = getRegularColumns(supportedColumns);
List<TestColumn> partitionColumns = getPartitionColumns(supportedColumns);
// Created projected columns for all regular supported columns
ImmutableList.Builder<TestColumn> writeColumnsBuilder = ImmutableList.builder();
ImmutableList.Builder<TestColumn> readeColumnsBuilder = ImmutableList.builder();
generateProjectedColumns(regularColumns, writeColumnsBuilder, readeColumnsBuilder);
List<TestColumn> writeColumns = writeColumnsBuilder.addAll(partitionColumns).build();
List<TestColumn> readColumns = readeColumnsBuilder.addAll(partitionColumns).build();
assertThatFileFormat(PARQUET).withWriteColumns(writeColumns).withReadColumns(readColumns).withRowsCount(rowCount).withSession(PARQUET_SESSION).isReadableByPageSource(new ParquetPageSourceFactory(HDFS_ENVIRONMENT, STATS, new ParquetReaderConfig(), new HiveConfig()));
assertThatFileFormat(PARQUET).withWriteColumns(writeColumns).withReadColumns(readColumns).withRowsCount(rowCount).withSession(PARQUET_SESSION_USE_NAME).isReadableByPageSource(new ParquetPageSourceFactory(HDFS_ENVIRONMENT, STATS, new ParquetReaderConfig(), new HiveConfig()));
}
use of io.trino.plugin.hive.parquet.ParquetPageSourceFactory in project trino by trinodb.
the class TestHiveFileFormats method testOptimizedParquetWriter.
@Test(dataProvider = "rowCount")
public void testOptimizedParquetWriter(int rowCount) throws Exception {
ConnectorSession session = getHiveSession(new HiveConfig(), new ParquetWriterConfig().setParquetOptimizedWriterEnabled(true));
assertTrue(HiveSessionProperties.isParquetOptimizedWriterEnabled(session));
List<TestColumn> testColumns = getTestColumnsSupportedByParquet();
assertThatFileFormat(PARQUET).withSession(session).withColumns(testColumns).withRowsCount(rowCount).withFileWriterFactory(new ParquetFileWriterFactory(HDFS_ENVIRONMENT, new NodeVersion("test-version"), TESTING_TYPE_MANAGER)).isReadableByPageSource(new ParquetPageSourceFactory(HDFS_ENVIRONMENT, STATS, new ParquetReaderConfig(), new HiveConfig()));
}
use of io.trino.plugin.hive.parquet.ParquetPageSourceFactory in project trino by trinodb.
the class TestHiveFileFormats method testFailForLongVarcharPartitionColumn.
@Test
public void testFailForLongVarcharPartitionColumn() throws Exception {
TestColumn partitionColumn = new TestColumn("partition_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(3)), "test", utf8Slice("tes"), true);
TestColumn varcharColumn = new TestColumn("varchar_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(3)), new HiveVarchar("tes", 3), utf8Slice("tes"));
List<TestColumn> columns = ImmutableList.of(partitionColumn, varcharColumn);
HiveErrorCode expectedErrorCode = HiveErrorCode.HIVE_INVALID_PARTITION_VALUE;
String expectedMessage = "Invalid partition value 'test' for varchar(3) partition key: partition_column";
assertThatFileFormat(RCTEXT).withColumns(columns).isFailingForPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig()), expectedErrorCode, expectedMessage).isFailingForRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
assertThatFileFormat(RCBINARY).withColumns(columns).isFailingForPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig()), expectedErrorCode, expectedMessage).isFailingForRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
assertThatFileFormat(ORC).withColumns(columns).isFailingForPageSource(new OrcPageSourceFactory(new OrcReaderOptions(), HDFS_ENVIRONMENT, STATS, UTC), expectedErrorCode, expectedMessage);
assertThatFileFormat(PARQUET).withColumns(columns).withSession(PARQUET_SESSION).isFailingForPageSource(new ParquetPageSourceFactory(HDFS_ENVIRONMENT, STATS, new ParquetReaderConfig(), new HiveConfig()), expectedErrorCode, expectedMessage);
assertThatFileFormat(SEQUENCEFILE).withColumns(columns).isFailingForRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
assertThatFileFormat(TEXTFILE).withColumns(columns).isFailingForRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
}
use of io.trino.plugin.hive.parquet.ParquetPageSourceFactory in project trino by trinodb.
the class TestHiveFileFormats method testParquetPageSourceSchemaEvolution.
@Test(dataProvider = "rowCount")
public void testParquetPageSourceSchemaEvolution(int rowCount) throws Exception {
List<TestColumn> writeColumns = getTestColumnsSupportedByParquet();
// test index-based access
List<TestColumn> readColumns = writeColumns.stream().map(column -> new TestColumn(column.getName() + "_new", column.getObjectInspector(), column.getWriteValue(), column.getExpectedValue(), column.isPartitionKey())).collect(toList());
assertThatFileFormat(PARQUET).withWriteColumns(writeColumns).withReadColumns(readColumns).withSession(PARQUET_SESSION).withRowsCount(rowCount).isReadableByPageSource(new ParquetPageSourceFactory(HDFS_ENVIRONMENT, STATS, new ParquetReaderConfig(), new HiveConfig()));
// test name-based access
readColumns = Lists.reverse(writeColumns);
assertThatFileFormat(PARQUET).withWriteColumns(writeColumns).withReadColumns(readColumns).withSession(PARQUET_SESSION_USE_NAME).isReadableByPageSource(new ParquetPageSourceFactory(HDFS_ENVIRONMENT, STATS, new ParquetReaderConfig(), new HiveConfig()));
}
Aggregations