use of io.trino.orc.OrcWriterOptions in project trino by trinodb.
the class TestHiveFileFormats method testOrcOptimizedWriter.
@Test(dataProvider = "validRowAndFileSizePadding")
public void testOrcOptimizedWriter(int rowCount, long fileSizePadding) throws Exception {
HiveSessionProperties hiveSessionProperties = new HiveSessionProperties(new HiveConfig(), new OrcReaderConfig(), new OrcWriterConfig().setValidationPercentage(100.0), new ParquetReaderConfig(), new ParquetWriterConfig());
ConnectorSession session = TestingConnectorSession.builder().setPropertyMetadata(hiveSessionProperties.getSessionProperties()).build();
// A Trino page cannot contain a map with null keys, so a page based writer cannot write null keys
List<TestColumn> testColumns = TEST_COLUMNS.stream().filter(TestHiveFileFormats::withoutNullMapKeyTests).collect(toList());
assertThatFileFormat(ORC).withColumns(testColumns).withRowsCount(rowCount).withSession(session).withFileSizePadding(fileSizePadding).withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), STATS, new OrcWriterOptions())).isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)).isReadableByPageSource(new OrcPageSourceFactory(new OrcReaderOptions(), HDFS_ENVIRONMENT, STATS, UTC));
}
use of io.trino.orc.OrcWriterOptions in project trino by trinodb.
the class TestOrcPredicates method testOrcPredicates.
private void testOrcPredicates(ConnectorSession session) throws Exception {
List<TestColumn> columnsToWrite = ImmutableList.of(columnPrimitiveInteger, columnStruct, columnPrimitiveBigInt);
File file = File.createTempFile("test", "orc_predicate");
file.delete();
try {
// Write data
OrcFileWriterFactory writerFactory = new OrcFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), STATS, new OrcWriterOptions());
FileSplit split = createTestFileTrino(file.getAbsolutePath(), ORC, HiveCompressionCodec.NONE, columnsToWrite, session, NUM_ROWS, writerFactory);
TupleDomain<TestColumn> testingPredicate;
// Verify predicates on base column
List<TestColumn> columnsToRead = columnsToWrite;
// All rows returned for a satisfying predicate
testingPredicate = TupleDomain.withColumnDomains(ImmutableMap.of(columnPrimitiveBigInt, Domain.singleValue(BIGINT, 6L)));
assertFilteredRows(testingPredicate, columnsToRead, session, split, NUM_ROWS);
// No rows returned for a mismatched predicate
testingPredicate = TupleDomain.withColumnDomains(ImmutableMap.of(columnPrimitiveBigInt, Domain.singleValue(BIGINT, 1L)));
assertFilteredRows(testingPredicate, columnsToRead, session, split, 0);
// Verify predicates on projected column
TestColumn projectedColumn = new TestColumn(columnStruct.getBaseName(), columnStruct.getBaseObjectInspector(), ImmutableList.of("field1"), ImmutableList.of(1), javaLongObjectInspector, 5L, 5L, false);
columnsToRead = ImmutableList.of(columnPrimitiveBigInt, projectedColumn);
// All rows returned for a satisfying predicate
testingPredicate = TupleDomain.withColumnDomains(ImmutableMap.of(projectedColumn, Domain.singleValue(BIGINT, 5L)));
assertFilteredRows(testingPredicate, columnsToRead, session, split, NUM_ROWS);
// No rows returned for a mismatched predicate
testingPredicate = TupleDomain.withColumnDomains(ImmutableMap.of(projectedColumn, Domain.singleValue(BIGINT, 6L)));
assertFilteredRows(testingPredicate, columnsToRead, session, split, 0);
} finally {
file.delete();
}
}
use of io.trino.orc.OrcWriterOptions in project trino by trinodb.
the class TestOrcWriterOptions method testOrcWriterOptionsFromOrcWriterConfig.
@Test
public void testOrcWriterOptionsFromOrcWriterConfig() {
OrcWriterConfig orcWriterConfig = new OrcWriterConfig().setWriterIdentification(LEGACY_HIVE_COMPATIBLE).setStripeMinSize(DataSize.ofBytes(32)).setStripeMaxSize(DataSize.ofBytes(64)).setStripeMaxRowCount(100).setRowGroupMaxRowCount(10).setDictionaryMaxMemory(DataSize.ofBytes(16)).setStringStatisticsLimit(DataSize.ofBytes(16)).setMaxCompressionBufferSize(DataSize.ofBytes(256)).setDefaultBloomFilterFpp(0.5);
OrcWriterOptions orcWriterOptions = orcWriterConfig.toOrcWriterOptions();
assertThat(orcWriterOptions.getWriterIdentification()).isEqualTo(LEGACY_HIVE_COMPATIBLE);
assertThat(orcWriterOptions.getStripeMinSize()).isEqualTo(DataSize.ofBytes(32));
assertThat(orcWriterOptions.getStripeMaxSize()).isEqualTo(DataSize.ofBytes(64));
assertThat(orcWriterOptions.getStripeMaxRowCount()).isEqualTo(100);
assertThat(orcWriterOptions.getRowGroupMaxRowCount()).isEqualTo(10);
assertThat(orcWriterOptions.getDictionaryMaxMemory()).isEqualTo(DataSize.ofBytes(16));
assertThat(orcWriterOptions.getMaxStringStatisticsLimit()).isEqualTo(DataSize.ofBytes(16));
assertThat(orcWriterOptions.getMaxCompressionBufferSize()).isEqualTo(DataSize.ofBytes(256));
assertThat(orcWriterOptions.getBloomFilterFpp()).isEqualTo(0.5);
assertThat(orcWriterOptions.isBloomFilterColumn("unknown_column")).isFalse();
}
use of io.trino.orc.OrcWriterOptions in project trino by trinodb.
the class TestOrcWriterOptions method testDefaultOrcWriterOptions.
@Test
public void testDefaultOrcWriterOptions() {
OrcWriterOptions orcWriterOptions = new OrcWriterOptions();
assertThat(orcWriterOptions.getWriterIdentification()).isEqualTo(TRINO);
assertThat(orcWriterOptions.getStripeMinSize()).isEqualTo(DataSize.of(32, MEGABYTE));
assertThat(orcWriterOptions.getStripeMaxSize()).isEqualTo(DataSize.of(64, MEGABYTE));
assertThat(orcWriterOptions.getStripeMaxRowCount()).isEqualTo(10_000_000);
assertThat(orcWriterOptions.getRowGroupMaxRowCount()).isEqualTo(10_000);
assertThat(orcWriterOptions.getDictionaryMaxMemory()).isEqualTo(DataSize.of(16, MEGABYTE));
assertThat(orcWriterOptions.getMaxStringStatisticsLimit()).isEqualTo(DataSize.ofBytes(64));
assertThat(orcWriterOptions.getMaxCompressionBufferSize()).isEqualTo(DataSize.of(256, KILOBYTE));
assertThat(orcWriterOptions.getBloomFilterFpp()).isEqualTo(0.05);
assertThat(orcWriterOptions.isBloomFilterColumn("unknown_column")).isFalse();
}
use of io.trino.orc.OrcWriterOptions in project trino by trinodb.
the class TestOrcWriterOptions method testOrcBloomFilterWithInvalidRange.
@Test(dataProvider = "invalidBloomFilterFpp")
public void testOrcBloomFilterWithInvalidRange(String fpp) {
Properties tableProperties = new Properties();
tableProperties.setProperty(ORC_BLOOM_FILTER_COLUMNS_KEY, "column_with_bloom_filter");
tableProperties.setProperty(ORC_BLOOM_FILTER_FPP_KEY, fpp);
assertThatThrownBy(() -> getOrcWriterOptions(tableProperties, new OrcWriterOptions())).hasMessage("bloomFilterFpp should be > 0.0 & < 1.0");
}
Aggregations