use of io.trino.plugin.hive.orc.OrcPageSourceFactory in project trino by trinodb.
the class TestHiveFileFormats method testOrcOptimizedWriter.
@Test(dataProvider = "validRowAndFileSizePadding")
public void testOrcOptimizedWriter(int rowCount, long fileSizePadding) throws Exception {
HiveSessionProperties hiveSessionProperties = new HiveSessionProperties(new HiveConfig(), new OrcReaderConfig(), new OrcWriterConfig().setValidationPercentage(100.0), new ParquetReaderConfig(), new ParquetWriterConfig());
ConnectorSession session = TestingConnectorSession.builder().setPropertyMetadata(hiveSessionProperties.getSessionProperties()).build();
// A Trino page cannot contain a map with null keys, so a page based writer cannot write null keys
List<TestColumn> testColumns = TEST_COLUMNS.stream().filter(TestHiveFileFormats::withoutNullMapKeyTests).collect(toList());
assertThatFileFormat(ORC).withColumns(testColumns).withRowsCount(rowCount).withSession(session).withFileSizePadding(fileSizePadding).withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), STATS, new OrcWriterOptions())).isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)).isReadableByPageSource(new OrcPageSourceFactory(new OrcReaderOptions(), HDFS_ENVIRONMENT, STATS, UTC));
}
use of io.trino.plugin.hive.orc.OrcPageSourceFactory in project trino by trinodb.
the class TestHiveFileFormats method testFailForLongVarcharPartitionColumn.
@Test
public void testFailForLongVarcharPartitionColumn() throws Exception {
TestColumn partitionColumn = new TestColumn("partition_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(3)), "test", utf8Slice("tes"), true);
TestColumn varcharColumn = new TestColumn("varchar_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(3)), new HiveVarchar("tes", 3), utf8Slice("tes"));
List<TestColumn> columns = ImmutableList.of(partitionColumn, varcharColumn);
HiveErrorCode expectedErrorCode = HiveErrorCode.HIVE_INVALID_PARTITION_VALUE;
String expectedMessage = "Invalid partition value 'test' for varchar(3) partition key: partition_column";
assertThatFileFormat(RCTEXT).withColumns(columns).isFailingForPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig()), expectedErrorCode, expectedMessage).isFailingForRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
assertThatFileFormat(RCBINARY).withColumns(columns).isFailingForPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig()), expectedErrorCode, expectedMessage).isFailingForRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
assertThatFileFormat(ORC).withColumns(columns).isFailingForPageSource(new OrcPageSourceFactory(new OrcReaderOptions(), HDFS_ENVIRONMENT, STATS, UTC), expectedErrorCode, expectedMessage);
assertThatFileFormat(PARQUET).withColumns(columns).withSession(PARQUET_SESSION).isFailingForPageSource(new ParquetPageSourceFactory(HDFS_ENVIRONMENT, STATS, new ParquetReaderConfig(), new HiveConfig()), expectedErrorCode, expectedMessage);
assertThatFileFormat(SEQUENCEFILE).withColumns(columns).isFailingForRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
assertThatFileFormat(TEXTFILE).withColumns(columns).isFailingForRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
}
use of io.trino.plugin.hive.orc.OrcPageSourceFactory in project trino by trinodb.
the class TestHiveFileFormats method testOrcUseColumnNameLowerCaseConversion.
@Test(dataProvider = "rowCount")
public void testOrcUseColumnNameLowerCaseConversion(int rowCount) throws Exception {
List<TestColumn> testColumnsUpperCase = TEST_COLUMNS.stream().map(testColumn -> new TestColumn(testColumn.getName().toUpperCase(Locale.ENGLISH), testColumn.getObjectInspector(), testColumn.getWriteValue(), testColumn.getExpectedValue(), testColumn.isPartitionKey())).collect(toList());
ConnectorSession session = getHiveSession(new HiveConfig(), new OrcReaderConfig().setUseColumnNames(true));
assertThatFileFormat(ORC).withWriteColumns(testColumnsUpperCase).withRowsCount(rowCount).withReadColumns(TEST_COLUMNS).withSession(session).isReadableByPageSource(new OrcPageSourceFactory(new OrcReaderOptions(), HDFS_ENVIRONMENT, STATS, UTC));
}
use of io.trino.plugin.hive.orc.OrcPageSourceFactory in project trino by trinodb.
the class TestHiveFileFormats method testOrcUseColumnNames.
@Test(dataProvider = "rowCount")
public void testOrcUseColumnNames(int rowCount) throws Exception {
ConnectorSession session = getHiveSession(new HiveConfig(), new OrcReaderConfig().setUseColumnNames(true));
// Hive binary writers are broken for timestamps
List<TestColumn> testColumns = TEST_COLUMNS.stream().filter(TestHiveFileFormats::withoutTimestamps).collect(toImmutableList());
assertThatFileFormat(ORC).withWriteColumns(testColumns).withRowsCount(rowCount).withReadColumns(Lists.reverse(testColumns)).withSession(session).isReadableByPageSource(new OrcPageSourceFactory(new OrcReaderOptions(), HDFS_ENVIRONMENT, STATS, UTC));
}
use of io.trino.plugin.hive.orc.OrcPageSourceFactory in project trino by trinodb.
the class TestHiveFileFormats method testORCProjectedColumns.
@Test(dataProvider = "rowCount")
public void testORCProjectedColumns(int rowCount) throws Exception {
List<TestColumn> supportedColumns = TEST_COLUMNS;
List<TestColumn> regularColumns = getRegularColumns(supportedColumns);
List<TestColumn> partitionColumns = getPartitionColumns(supportedColumns);
// Created projected columns for all regular supported columns
ImmutableList.Builder<TestColumn> writeColumnsBuilder = ImmutableList.builder();
ImmutableList.Builder<TestColumn> readeColumnsBuilder = ImmutableList.builder();
generateProjectedColumns(regularColumns, writeColumnsBuilder, readeColumnsBuilder);
List<TestColumn> writeColumns = writeColumnsBuilder.addAll(partitionColumns).build();
List<TestColumn> readColumns = readeColumnsBuilder.addAll(partitionColumns).build();
ConnectorSession session = getHiveSession(new HiveConfig(), new OrcReaderConfig().setUseColumnNames(true));
assertThatFileFormat(ORC).withWriteColumns(writeColumns).withReadColumns(readColumns).withRowsCount(rowCount).withSession(session).isReadableByPageSource(new OrcPageSourceFactory(new OrcReaderOptions(), HDFS_ENVIRONMENT, STATS, UTC));
assertThatFileFormat(ORC).withWriteColumns(writeColumns).withReadColumns(readColumns).withRowsCount(rowCount).isReadableByPageSource(new OrcPageSourceFactory(new OrcReaderOptions(), HDFS_ENVIRONMENT, STATS, UTC));
}
Aggregations