Search in sources :

Example 1 with OrcPageSourceFactory

use of io.prestosql.plugin.hive.orc.OrcPageSourceFactory in project hetu-core by openlookeng.

the class TestHiveFileFormats method testTruncateVarcharColumn.

@Test
public void testTruncateVarcharColumn() throws Exception {
    TestColumn writeColumn = new TestColumn("varchar_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(4)), new HiveVarchar("test", 4), utf8Slice("test"));
    TestColumn readColumn = new TestColumn("varchar_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(3)), new HiveVarchar("tes", 3), utf8Slice("tes"));
    assertThatFileFormat(RCTEXT).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig())).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
    assertThatFileFormat(RCBINARY).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig())).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
    assertThatFileFormat(ORC).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, new HiveConfig().setUseOrcColumnNames(false), HDFS_ENVIRONMENT, STATS, OrcCacheStore.builder().newCacheStore(new HiveConfig().getOrcFileTailCacheLimit(), Duration.ofMillis(new HiveConfig().getOrcFileTailCacheTtl().toMillis()), new HiveConfig().getOrcStripeFooterCacheLimit(), Duration.ofMillis(new HiveConfig().getOrcStripeFooterCacheTtl().toMillis()), new HiveConfig().getOrcRowIndexCacheLimit(), Duration.ofMillis(new HiveConfig().getOrcRowIndexCacheTtl().toMillis()), new HiveConfig().getOrcBloomFiltersCacheLimit(), Duration.ofMillis(new HiveConfig().getOrcBloomFiltersCacheTtl().toMillis()), new HiveConfig().getOrcRowDataCacheMaximumWeight(), Duration.ofMillis(new HiveConfig().getOrcRowDataCacheTtl().toMillis()), new HiveConfig().isOrcCacheStatsMetricCollectionEnabled())));
    assertThatFileFormat(PARQUET).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).withSession(parquetPageSourceSession).isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig()));
    assertThatFileFormat(AVRO).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
    assertThatFileFormat(SEQUENCEFILE).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
    assertThatFileFormat(TEXTFILE).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
}
Also used : VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) HiveTestUtils.createGenericHiveRecordCursorProvider(io.prestosql.plugin.hive.HiveTestUtils.createGenericHiveRecordCursorProvider) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) OrcPageSourceFactory(io.prestosql.plugin.hive.orc.OrcPageSourceFactory) RcFilePageSourceFactory(io.prestosql.plugin.hive.rcfile.RcFilePageSourceFactory) ParquetPageSourceFactory(io.prestosql.plugin.hive.parquet.ParquetPageSourceFactory) Test(org.testng.annotations.Test)

Example 2 with OrcPageSourceFactory

use of io.prestosql.plugin.hive.orc.OrcPageSourceFactory in project hetu-core by openlookeng.

the class TestHiveFileFormats method testOrc.

@Test(dataProvider = "rowCount")
public void testOrc(int rowCount) throws Exception {
    // Hive binary writers are broken for timestamps
    List<TestColumn> testColumns = TEST_COLUMNS.stream().filter(TestHiveFileFormats::withoutTimestamps).collect(toImmutableList());
    assertThatFileFormat(ORC).withColumns(testColumns).withRowsCount(rowCount).isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, new HiveConfig().setUseOrcColumnNames(false), HDFS_ENVIRONMENT, STATS, OrcCacheStore.builder().newCacheStore(new HiveConfig().getOrcFileTailCacheLimit(), Duration.ofMillis(new HiveConfig().getOrcFileTailCacheTtl().toMillis()), new HiveConfig().getOrcStripeFooterCacheLimit(), Duration.ofMillis(new HiveConfig().getOrcStripeFooterCacheTtl().toMillis()), new HiveConfig().getOrcRowIndexCacheLimit(), Duration.ofMillis(new HiveConfig().getOrcRowIndexCacheTtl().toMillis()), new HiveConfig().getOrcBloomFiltersCacheLimit(), Duration.ofMillis(new HiveConfig().getOrcBloomFiltersCacheTtl().toMillis()), new HiveConfig().getOrcRowDataCacheMaximumWeight(), Duration.ofMillis(new HiveConfig().getOrcRowDataCacheTtl().toMillis()), new HiveConfig().isOrcCacheStatsMetricCollectionEnabled())));
}
Also used : OrcPageSourceFactory(io.prestosql.plugin.hive.orc.OrcPageSourceFactory) Test(org.testng.annotations.Test)

Example 3 with OrcPageSourceFactory

use of io.prestosql.plugin.hive.orc.OrcPageSourceFactory in project hetu-core by openlookeng.

the class TestHiveFileFormats method testFailForLongVarcharPartitionColumn.

@Test
public void testFailForLongVarcharPartitionColumn() throws Exception {
    TestColumn partitionColumn = new TestColumn("partition_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(3)), "test", utf8Slice("tes"), true);
    TestColumn varcharColumn = new TestColumn("varchar_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(3)), new HiveVarchar("tes", 3), utf8Slice("tes"));
    List<TestColumn> columns = ImmutableList.of(partitionColumn, varcharColumn);
    HiveErrorCode expectedErrorCode = HiveErrorCode.HIVE_INVALID_PARTITION_VALUE;
    String expectedMessage = "Invalid partition value 'test' for varchar(3) partition key: partition_column";
    assertThatFileFormat(RCTEXT).withColumns(columns).isFailingForPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig()), expectedErrorCode, expectedMessage).isFailingForRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
    assertThatFileFormat(RCBINARY).withColumns(columns).isFailingForPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig()), expectedErrorCode, expectedMessage).isFailingForRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
    assertThatFileFormat(ORC).withColumns(columns).isFailingForPageSource(new OrcPageSourceFactory(TYPE_MANAGER, new HiveConfig().setUseOrcColumnNames(false), HDFS_ENVIRONMENT, STATS, OrcCacheStore.builder().newCacheStore(new HiveConfig().getOrcFileTailCacheLimit(), Duration.ofMillis(new HiveConfig().getOrcFileTailCacheTtl().toMillis()), new HiveConfig().getOrcStripeFooterCacheLimit(), Duration.ofMillis(new HiveConfig().getOrcStripeFooterCacheTtl().toMillis()), new HiveConfig().getOrcRowIndexCacheLimit(), Duration.ofMillis(new HiveConfig().getOrcRowIndexCacheTtl().toMillis()), new HiveConfig().getOrcBloomFiltersCacheLimit(), Duration.ofMillis(new HiveConfig().getOrcBloomFiltersCacheTtl().toMillis()), new HiveConfig().getOrcRowDataCacheMaximumWeight(), Duration.ofMillis(new HiveConfig().getOrcRowDataCacheTtl().toMillis()), new HiveConfig().isOrcCacheStatsMetricCollectionEnabled())), expectedErrorCode, expectedMessage);
    assertThatFileFormat(PARQUET).withColumns(columns).withSession(parquetPageSourceSession).isFailingForPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig()), expectedErrorCode, expectedMessage);
    assertThatFileFormat(SEQUENCEFILE).withColumns(columns).isFailingForRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
    assertThatFileFormat(TEXTFILE).withColumns(columns).isFailingForRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
}
Also used : VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) HiveTestUtils.createGenericHiveRecordCursorProvider(io.prestosql.plugin.hive.HiveTestUtils.createGenericHiveRecordCursorProvider) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) OrcPageSourceFactory(io.prestosql.plugin.hive.orc.OrcPageSourceFactory) RcFilePageSourceFactory(io.prestosql.plugin.hive.rcfile.RcFilePageSourceFactory) ParquetPageSourceFactory(io.prestosql.plugin.hive.parquet.ParquetPageSourceFactory) Test(org.testng.annotations.Test)

Example 4 with OrcPageSourceFactory

use of io.prestosql.plugin.hive.orc.OrcPageSourceFactory in project hetu-core by openlookeng.

the class HiveTestUtils method getDefaultHiveDataStreamFactories.

public static Set<HivePageSourceFactory> getDefaultHiveDataStreamFactories(HiveConfig hiveConfig) {
    FileFormatDataSourceStats stats = new FileFormatDataSourceStats();
    HdfsEnvironment testHdfsEnvironment = createTestHdfsEnvironment(hiveConfig);
    return ImmutableSet.<HivePageSourceFactory>builder().add(new RcFilePageSourceFactory(TYPE_MANAGER, testHdfsEnvironment, stats, hiveConfig)).add(new OrcPageSourceFactory(TYPE_MANAGER, hiveConfig, testHdfsEnvironment, stats, OrcCacheStore.builder().newCacheStore(new HiveConfig().getOrcFileTailCacheLimit(), Duration.ofMillis(new HiveConfig().getOrcFileTailCacheTtl().toMillis()), new HiveConfig().getOrcStripeFooterCacheLimit(), Duration.ofMillis(new HiveConfig().getOrcStripeFooterCacheTtl().toMillis()), new HiveConfig().getOrcRowIndexCacheLimit(), Duration.ofMillis(new HiveConfig().getOrcRowIndexCacheTtl().toMillis()), new HiveConfig().getOrcBloomFiltersCacheLimit(), Duration.ofMillis(new HiveConfig().getOrcBloomFiltersCacheTtl().toMillis()), new HiveConfig().getOrcRowDataCacheMaximumWeight(), Duration.ofMillis(new HiveConfig().getOrcRowDataCacheTtl().toMillis()), new HiveConfig().isOrcCacheStatsMetricCollectionEnabled()))).add(new ParquetPageSourceFactory(TYPE_MANAGER, testHdfsEnvironment, stats, hiveConfig)).build();
}
Also used : OrcPageSourceFactory(io.prestosql.plugin.hive.orc.OrcPageSourceFactory) RcFilePageSourceFactory(io.prestosql.plugin.hive.rcfile.RcFilePageSourceFactory) ParquetPageSourceFactory(io.prestosql.plugin.hive.parquet.ParquetPageSourceFactory)

Example 5 with OrcPageSourceFactory

use of io.prestosql.plugin.hive.orc.OrcPageSourceFactory in project hetu-core by openlookeng.

the class TestHiveFileFormats method testOrcUseColumnNames.

@Test(dataProvider = "rowCount")
public void testOrcUseColumnNames(int rowCount) throws Exception {
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
    // Hive binary writers are broken for timestamps
    List<TestColumn> testColumns = TEST_COLUMNS.stream().filter(TestHiveFileFormats::withoutTimestamps).collect(toImmutableList());
    assertThatFileFormat(ORC).withWriteColumns(testColumns).withRowsCount(rowCount).withReadColumns(Lists.reverse(testColumns)).withSession(session).isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, new HiveConfig().setUseOrcColumnNames(true), HDFS_ENVIRONMENT, STATS, OrcCacheStore.builder().newCacheStore(new HiveConfig().getOrcFileTailCacheLimit(), Duration.ofMillis(new HiveConfig().getOrcFileTailCacheTtl().toMillis()), new HiveConfig().getOrcStripeFooterCacheLimit(), Duration.ofMillis(new HiveConfig().getOrcStripeFooterCacheTtl().toMillis()), new HiveConfig().getOrcRowIndexCacheLimit(), Duration.ofMillis(new HiveConfig().getOrcRowIndexCacheTtl().toMillis()), new HiveConfig().getOrcBloomFiltersCacheLimit(), Duration.ofMillis(new HiveConfig().getOrcBloomFiltersCacheTtl().toMillis()), new HiveConfig().getOrcRowDataCacheMaximumWeight(), Duration.ofMillis(new HiveConfig().getOrcRowDataCacheTtl().toMillis()), new HiveConfig().isOrcCacheStatsMetricCollectionEnabled())));
}
Also used : TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) OrcPageSourceFactory(io.prestosql.plugin.hive.orc.OrcPageSourceFactory) Test(org.testng.annotations.Test)

Aggregations

OrcPageSourceFactory (io.prestosql.plugin.hive.orc.OrcPageSourceFactory)6 Test (org.testng.annotations.Test)5 HiveTestUtils.createGenericHiveRecordCursorProvider (io.prestosql.plugin.hive.HiveTestUtils.createGenericHiveRecordCursorProvider)3 ParquetPageSourceFactory (io.prestosql.plugin.hive.parquet.ParquetPageSourceFactory)3 RcFilePageSourceFactory (io.prestosql.plugin.hive.rcfile.RcFilePageSourceFactory)3 TestingConnectorSession (io.prestosql.testing.TestingConnectorSession)2 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)2 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)2 OrcWriterOptions (io.prestosql.orc.OrcWriterOptions)1