Search in sources :

Example 1 with RcFilePageSourceFactory

use of com.facebook.presto.hive.rcfile.RcFilePageSourceFactory in project presto by prestodb.

the class HiveTestUtils method getDefaultHiveBatchPageSourceFactories.

public static Set<HiveBatchPageSourceFactory> getDefaultHiveBatchPageSourceFactories(HiveClientConfig hiveClientConfig, MetastoreClientConfig metastoreClientConfig) {
    FileFormatDataSourceStats stats = new FileFormatDataSourceStats();
    HdfsEnvironment testHdfsEnvironment = createTestHdfsEnvironment(hiveClientConfig, metastoreClientConfig);
    return ImmutableSet.<HiveBatchPageSourceFactory>builder().add(new RcFilePageSourceFactory(FUNCTION_AND_TYPE_MANAGER, testHdfsEnvironment, stats)).add(new OrcBatchPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, hiveClientConfig, testHdfsEnvironment, stats, new StorageOrcFileTailSource(), StripeMetadataSourceFactory.of(new StorageStripeMetadataSource()))).add(new DwrfBatchPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, hiveClientConfig, testHdfsEnvironment, stats, new StorageOrcFileTailSource(), StripeMetadataSourceFactory.of(new StorageStripeMetadataSource()), NO_ENCRYPTION)).add(new ParquetPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, testHdfsEnvironment, stats, new MetadataReader())).add(new PageFilePageSourceFactory(testHdfsEnvironment, new BlockEncodingManager())).build();
}
Also used : DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) BlockEncodingManager(com.facebook.presto.common.block.BlockEncodingManager) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) PageFilePageSourceFactory(com.facebook.presto.hive.pagefile.PageFilePageSourceFactory)

Example 2 with RcFilePageSourceFactory

use of com.facebook.presto.hive.rcfile.RcFilePageSourceFactory in project presto by prestodb.

the class TestHiveFileFormats method testRcBinaryPageSource.

@Test(dataProvider = "rowCount")
public void testRcBinaryPageSource(int rowCount) throws Exception {
    // RCBinary does not support complex type as key of a map and interprets empty VARCHAR as nulls
    List<TestColumn> testColumns = TEST_COLUMNS.stream().filter(testColumn -> !testColumn.getName().equals("t_empty_varchar")).collect(toList());
    assertThatFileFormat(RCBINARY).withColumns(testColumns).withRowsCount(rowCount).isReadableByPageSource(new RcFilePageSourceFactory(FUNCTION_AND_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS));
}
Also used : RecordPageSource(com.facebook.presto.spi.RecordPageSource) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Test(org.testng.annotations.Test) HIVE_PARTITION_SCHEMA_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) FileSplit(org.apache.hadoop.mapred.FileSplit) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LzoCodec(io.airlift.compress.lzo.LzoCodec) DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructuralTestUtil.rowBlockOf(com.facebook.presto.tests.StructuralTestUtil.rowBlockOf) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConnectorSession(com.facebook.presto.spi.ConnectorSession) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) HIVE_CLIENT_CONFIG(com.facebook.presto.hive.HiveTestUtils.HIVE_CLIENT_CONFIG) Iterables.filter(com.google.common.collect.Iterables.filter) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) FUNCTION_RESOLUTION(com.facebook.presto.hive.HiveTestUtils.FUNCTION_RESOLUTION) ROW_EXPRESSION_SERVICE(com.facebook.presto.hive.HiveTestUtils.ROW_EXPRESSION_SERVICE) Lists(com.google.common.collect.Lists) RCTEXT(com.facebook.presto.hive.HiveStorageFormat.RCTEXT) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) CSV(com.facebook.presto.hive.HiveStorageFormat.CSV) ArrayType(com.facebook.presto.common.type.ArrayType) JSON(com.facebook.presto.hive.HiveStorageFormat.JSON) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) File(java.io.File) FUNCTION_AND_TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.FUNCTION_AND_TYPE_MANAGER) RCBINARY(com.facebook.presto.hive.HiveStorageFormat.RCBINARY) HDFS_ENVIRONMENT(com.facebook.presto.hive.HiveTestUtils.HDFS_ENVIRONMENT) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) ParquetFileWriterFactory(com.facebook.presto.hive.parquet.ParquetFileWriterFactory) RowType(com.facebook.presto.common.type.RowType) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) CacheConfig(com.facebook.presto.cache.CacheConfig) SchemaTableName(com.facebook.presto.spi.SchemaTableName) AVRO(com.facebook.presto.hive.HiveStorageFormat.AVRO) TEXTFILE(com.facebook.presto.hive.HiveStorageFormat.TEXTFILE) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) TimeZone(java.util.TimeZone) BeforeClass(org.testng.annotations.BeforeClass) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) TRUE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) StructuralTestUtil.arrayBlockOf(com.facebook.presto.tests.StructuralTestUtil.arrayBlockOf) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) Optional(java.util.Optional) ErrorCodeSupplier(com.facebook.presto.spi.ErrorCodeSupplier) DataProvider(org.testng.annotations.DataProvider) Column(com.facebook.presto.hive.metastore.Column) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) HiveTestUtils.getTypes(com.facebook.presto.hive.HiveTestUtils.getTypes) HIVE_INVALID_PARTITION_VALUE(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) Assert.assertEquals(org.testng.Assert.assertEquals) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) LzopCodec(io.airlift.compress.lzo.LzopCodec) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) NO_ENCRYPTION(com.facebook.presto.hive.HiveDwrfEncryptionProvider.NO_ENCRYPTION) StructuralTestUtil.mapBlockOf(com.facebook.presto.tests.StructuralTestUtil.mapBlockOf) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) SESSION(com.facebook.presto.hive.HiveTestUtils.SESSION) Objects.requireNonNull(java.util.Objects.requireNonNull) DEFAULT_HIVE_FILE_CONTEXT(com.facebook.presto.hive.HiveFileContext.DEFAULT_HIVE_FILE_CONTEXT) SEQUENCEFILE(com.facebook.presto.hive.HiveStorageFormat.SEQUENCEFILE) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) Storage(com.facebook.presto.hive.metastore.Storage) Assert.fail(org.testng.Assert.fail) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) Test(org.testng.annotations.Test)

Example 3 with RcFilePageSourceFactory

use of com.facebook.presto.hive.rcfile.RcFilePageSourceFactory in project presto by prestodb.

the class TestHiveFileFormats method testRcTextOptimizedWriter.

@Test(dataProvider = "rowCount")
public void testRcTextOptimizedWriter(int rowCount) throws Exception {
    List<TestColumn> testColumns = TEST_COLUMNS.stream().filter(TestHiveFileFormats::withoutNullMapKeyTests).collect(toImmutableList());
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setRcfileOptimizedWriterEnabled(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
    assertThatFileFormat(RCTEXT).withColumns(testColumns).withRowsCount(rowCount).withSession(session).withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, FUNCTION_AND_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)).isReadableByPageSource(new RcFilePageSourceFactory(FUNCTION_AND_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS));
}
Also used : TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) CacheConfig(com.facebook.presto.cache.CacheConfig) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) Test(org.testng.annotations.Test)

Example 4 with RcFilePageSourceFactory

use of com.facebook.presto.hive.rcfile.RcFilePageSourceFactory in project presto by prestodb.

the class TestHiveFileFormats method testRcBinaryOptimizedWriter.

@Test(dataProvider = "rowCount")
public void testRcBinaryOptimizedWriter(int rowCount) throws Exception {
    List<TestColumn> testColumns = TEST_COLUMNS.stream().filter(testColumn -> !testColumn.getName().equals("t_empty_varchar")).filter(TestHiveFileFormats::withoutNullMapKeyTests).collect(toList());
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setRcfileOptimizedWriterEnabled(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
    assertThatFileFormat(RCBINARY).withColumns(testColumns).withRowsCount(rowCount).withSession(session).withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, FUNCTION_AND_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)).isReadableByPageSource(new RcFilePageSourceFactory(FUNCTION_AND_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS));
}
Also used : TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) CacheConfig(com.facebook.presto.cache.CacheConfig) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) Test(org.testng.annotations.Test)

Example 5 with RcFilePageSourceFactory

use of com.facebook.presto.hive.rcfile.RcFilePageSourceFactory in project presto by prestodb.

the class TestHiveFileFormats method testTruncateVarcharColumn.

@Test
public void testTruncateVarcharColumn() throws Exception {
    TestColumn writeColumn = new TestColumn("varchar_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(4)), new HiveVarchar("test", 4), utf8Slice("test"));
    TestColumn readColumn = new TestColumn("varchar_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(3)), new HiveVarchar("tes", 3), utf8Slice("tes"));
    assertThatFileFormat(RCTEXT).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByPageSource(new RcFilePageSourceFactory(FUNCTION_AND_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
    assertThatFileFormat(RCBINARY).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByPageSource(new RcFilePageSourceFactory(FUNCTION_AND_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
    assertThatFileFormat(ORC).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByPageSource(new OrcBatchPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, false, HDFS_ENVIRONMENT, STATS, 100, new StorageOrcFileTailSource(), StripeMetadataSourceFactory.of(new StorageStripeMetadataSource())));
    assertThatFileFormat(PARQUET).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).withSession(parquetPageSourceSession).isReadableByPageSource(new ParquetPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, HDFS_ENVIRONMENT, STATS, METADATA_READER));
    assertThatFileFormat(AVRO).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
    assertThatFileFormat(SEQUENCEFILE).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
    assertThatFileFormat(TEXTFILE).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
}
Also used : VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) Test(org.testng.annotations.Test)

Aggregations

RcFilePageSourceFactory (com.facebook.presto.hive.rcfile.RcFilePageSourceFactory)7 Test (org.testng.annotations.Test)6 OrcBatchPageSourceFactory (com.facebook.presto.hive.orc.OrcBatchPageSourceFactory)4 ParquetPageSourceFactory (com.facebook.presto.hive.parquet.ParquetPageSourceFactory)4 StorageStripeMetadataSource (com.facebook.presto.orc.StorageStripeMetadataSource)4 StorageOrcFileTailSource (com.facebook.presto.orc.cache.StorageOrcFileTailSource)4 TestingConnectorSession (com.facebook.presto.testing.TestingConnectorSession)4 CacheConfig (com.facebook.presto.cache.CacheConfig)3 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)3 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)3 DwrfBatchPageSourceFactory (com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory)2 MetadataReader (com.facebook.presto.parquet.cache.MetadataReader)2 BlockEncodingManager (com.facebook.presto.common.block.BlockEncodingManager)1 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)1 ArrayType (com.facebook.presto.common.type.ArrayType)1 BIGINT (com.facebook.presto.common.type.BigintType.BIGINT)1 BOOLEAN (com.facebook.presto.common.type.BooleanType.BOOLEAN)1 DOUBLE (com.facebook.presto.common.type.DoubleType.DOUBLE)1 INTEGER (com.facebook.presto.common.type.IntegerType.INTEGER)1 RowType (com.facebook.presto.common.type.RowType)1