Search in sources :

Example 6 with StorageStripeMetadataSource

use of com.facebook.presto.orc.StorageStripeMetadataSource in project presto by prestodb.

the class TestHiveFileFormats method testDwrfOptimizedWriter.

@Test(dataProvider = "rowCount")
public void testDwrfOptimizedWriter(int rowCount) throws Exception {
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setOrcOptimizedWriterEnabled(true).setOrcWriterValidationPercentage(100.0), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
    // DWRF does not support modern Hive types
    // A Presto page can not contain a map with null keys, so a page based writer can not write null keys
    List<TestColumn> testColumns = TEST_COLUMNS.stream().filter(testColumn -> !hasType(testColumn.getObjectInspector(), PrimitiveCategory.DATE, PrimitiveCategory.VARCHAR, PrimitiveCategory.CHAR, PrimitiveCategory.DECIMAL)).filter(testColumn -> !testColumn.getName().equals("t_map_null_key") && !testColumn.getName().equals("t_map_null_key_complex_value") && !testColumn.getName().equals("t_map_null_key_complex_key_value")).collect(toList());
    assertThatFileFormat(DWRF).withColumns(testColumns).withRowsCount(rowCount).withSession(session).withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, new OutputStreamDataSinkFactory(), FUNCTION_AND_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS, new OrcFileWriterConfig(), NO_ENCRYPTION)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)).isReadableByPageSource(new DwrfBatchPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, HIVE_CLIENT_CONFIG, HDFS_ENVIRONMENT, STATS, new StorageOrcFileTailSource(), StripeMetadataSourceFactory.of(new StorageStripeMetadataSource()), NO_ENCRYPTION));
}
Also used : RecordPageSource(com.facebook.presto.spi.RecordPageSource) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Test(org.testng.annotations.Test) HIVE_PARTITION_SCHEMA_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) FileSplit(org.apache.hadoop.mapred.FileSplit) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LzoCodec(io.airlift.compress.lzo.LzoCodec) DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructuralTestUtil.rowBlockOf(com.facebook.presto.tests.StructuralTestUtil.rowBlockOf) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConnectorSession(com.facebook.presto.spi.ConnectorSession) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) HIVE_CLIENT_CONFIG(com.facebook.presto.hive.HiveTestUtils.HIVE_CLIENT_CONFIG) Iterables.filter(com.google.common.collect.Iterables.filter) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) FUNCTION_RESOLUTION(com.facebook.presto.hive.HiveTestUtils.FUNCTION_RESOLUTION) ROW_EXPRESSION_SERVICE(com.facebook.presto.hive.HiveTestUtils.ROW_EXPRESSION_SERVICE) Lists(com.google.common.collect.Lists) RCTEXT(com.facebook.presto.hive.HiveStorageFormat.RCTEXT) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) CSV(com.facebook.presto.hive.HiveStorageFormat.CSV) ArrayType(com.facebook.presto.common.type.ArrayType) JSON(com.facebook.presto.hive.HiveStorageFormat.JSON) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) File(java.io.File) FUNCTION_AND_TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.FUNCTION_AND_TYPE_MANAGER) RCBINARY(com.facebook.presto.hive.HiveStorageFormat.RCBINARY) HDFS_ENVIRONMENT(com.facebook.presto.hive.HiveTestUtils.HDFS_ENVIRONMENT) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) ParquetFileWriterFactory(com.facebook.presto.hive.parquet.ParquetFileWriterFactory) RowType(com.facebook.presto.common.type.RowType) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) CacheConfig(com.facebook.presto.cache.CacheConfig) SchemaTableName(com.facebook.presto.spi.SchemaTableName) AVRO(com.facebook.presto.hive.HiveStorageFormat.AVRO) TEXTFILE(com.facebook.presto.hive.HiveStorageFormat.TEXTFILE) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) TimeZone(java.util.TimeZone) BeforeClass(org.testng.annotations.BeforeClass) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) TRUE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) StructuralTestUtil.arrayBlockOf(com.facebook.presto.tests.StructuralTestUtil.arrayBlockOf) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) Optional(java.util.Optional) ErrorCodeSupplier(com.facebook.presto.spi.ErrorCodeSupplier) DataProvider(org.testng.annotations.DataProvider) Column(com.facebook.presto.hive.metastore.Column) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) HiveTestUtils.getTypes(com.facebook.presto.hive.HiveTestUtils.getTypes) HIVE_INVALID_PARTITION_VALUE(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) Assert.assertEquals(org.testng.Assert.assertEquals) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) LzopCodec(io.airlift.compress.lzo.LzopCodec) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) NO_ENCRYPTION(com.facebook.presto.hive.HiveDwrfEncryptionProvider.NO_ENCRYPTION) StructuralTestUtil.mapBlockOf(com.facebook.presto.tests.StructuralTestUtil.mapBlockOf) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) SESSION(com.facebook.presto.hive.HiveTestUtils.SESSION) Objects.requireNonNull(java.util.Objects.requireNonNull) DEFAULT_HIVE_FILE_CONTEXT(com.facebook.presto.hive.HiveFileContext.DEFAULT_HIVE_FILE_CONTEXT) SEQUENCEFILE(com.facebook.presto.hive.HiveStorageFormat.SEQUENCEFILE) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) Storage(com.facebook.presto.hive.metastore.Storage) Assert.fail(org.testng.Assert.fail) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) CacheConfig(com.facebook.presto.cache.CacheConfig) Test(org.testng.annotations.Test)

Example 7 with StorageStripeMetadataSource

use of com.facebook.presto.orc.StorageStripeMetadataSource in project presto by prestodb.

the class TestHiveFileFormats method testOrcOptimizedWriter.

@Test(dataProvider = "rowCount")
public void testOrcOptimizedWriter(int rowCount) throws Exception {
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setOrcOptimizedWriterEnabled(true).setOrcWriterValidationPercentage(100.0), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
    // A Presto page can not contain a map with null keys, so a page based writer can not write null keys
    List<TestColumn> testColumns = TEST_COLUMNS.stream().filter(testColumn -> !testColumn.getName().equals("t_map_null_key") && !testColumn.getName().equals("t_map_null_key_complex_value") && !testColumn.getName().equals("t_map_null_key_complex_key_value")).collect(toList());
    assertThatFileFormat(ORC).withColumns(testColumns).withRowsCount(rowCount).withSession(session).withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, new OutputStreamDataSinkFactory(), FUNCTION_AND_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS, new OrcFileWriterConfig(), NO_ENCRYPTION)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)).isReadableByPageSource(new OrcBatchPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, false, HDFS_ENVIRONMENT, STATS, 100, new StorageOrcFileTailSource(), StripeMetadataSourceFactory.of(new StorageStripeMetadataSource())));
}
Also used : RecordPageSource(com.facebook.presto.spi.RecordPageSource) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Test(org.testng.annotations.Test) HIVE_PARTITION_SCHEMA_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) FileSplit(org.apache.hadoop.mapred.FileSplit) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LzoCodec(io.airlift.compress.lzo.LzoCodec) DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructuralTestUtil.rowBlockOf(com.facebook.presto.tests.StructuralTestUtil.rowBlockOf) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConnectorSession(com.facebook.presto.spi.ConnectorSession) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) HIVE_CLIENT_CONFIG(com.facebook.presto.hive.HiveTestUtils.HIVE_CLIENT_CONFIG) Iterables.filter(com.google.common.collect.Iterables.filter) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) FUNCTION_RESOLUTION(com.facebook.presto.hive.HiveTestUtils.FUNCTION_RESOLUTION) ROW_EXPRESSION_SERVICE(com.facebook.presto.hive.HiveTestUtils.ROW_EXPRESSION_SERVICE) Lists(com.google.common.collect.Lists) RCTEXT(com.facebook.presto.hive.HiveStorageFormat.RCTEXT) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) CSV(com.facebook.presto.hive.HiveStorageFormat.CSV) ArrayType(com.facebook.presto.common.type.ArrayType) JSON(com.facebook.presto.hive.HiveStorageFormat.JSON) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) File(java.io.File) FUNCTION_AND_TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.FUNCTION_AND_TYPE_MANAGER) RCBINARY(com.facebook.presto.hive.HiveStorageFormat.RCBINARY) HDFS_ENVIRONMENT(com.facebook.presto.hive.HiveTestUtils.HDFS_ENVIRONMENT) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) ParquetFileWriterFactory(com.facebook.presto.hive.parquet.ParquetFileWriterFactory) RowType(com.facebook.presto.common.type.RowType) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) CacheConfig(com.facebook.presto.cache.CacheConfig) SchemaTableName(com.facebook.presto.spi.SchemaTableName) AVRO(com.facebook.presto.hive.HiveStorageFormat.AVRO) TEXTFILE(com.facebook.presto.hive.HiveStorageFormat.TEXTFILE) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) TimeZone(java.util.TimeZone) BeforeClass(org.testng.annotations.BeforeClass) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) TRUE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) StructuralTestUtil.arrayBlockOf(com.facebook.presto.tests.StructuralTestUtil.arrayBlockOf) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) Optional(java.util.Optional) ErrorCodeSupplier(com.facebook.presto.spi.ErrorCodeSupplier) DataProvider(org.testng.annotations.DataProvider) Column(com.facebook.presto.hive.metastore.Column) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) HiveTestUtils.getTypes(com.facebook.presto.hive.HiveTestUtils.getTypes) HIVE_INVALID_PARTITION_VALUE(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) Assert.assertEquals(org.testng.Assert.assertEquals) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) LzopCodec(io.airlift.compress.lzo.LzopCodec) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) NO_ENCRYPTION(com.facebook.presto.hive.HiveDwrfEncryptionProvider.NO_ENCRYPTION) StructuralTestUtil.mapBlockOf(com.facebook.presto.tests.StructuralTestUtil.mapBlockOf) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) SESSION(com.facebook.presto.hive.HiveTestUtils.SESSION) Objects.requireNonNull(java.util.Objects.requireNonNull) DEFAULT_HIVE_FILE_CONTEXT(com.facebook.presto.hive.HiveFileContext.DEFAULT_HIVE_FILE_CONTEXT) SEQUENCEFILE(com.facebook.presto.hive.HiveStorageFormat.SEQUENCEFILE) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) Storage(com.facebook.presto.hive.metastore.Storage) Assert.fail(org.testng.Assert.fail) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) CacheConfig(com.facebook.presto.cache.CacheConfig) Test(org.testng.annotations.Test)

Example 8 with StorageStripeMetadataSource

use of com.facebook.presto.orc.StorageStripeMetadataSource in project presto by prestodb.

the class TestHiveFileFormats method testTruncateVarcharColumn.

@Test
public void testTruncateVarcharColumn() throws Exception {
    TestColumn writeColumn = new TestColumn("varchar_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(4)), new HiveVarchar("test", 4), utf8Slice("test"));
    TestColumn readColumn = new TestColumn("varchar_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(3)), new HiveVarchar("tes", 3), utf8Slice("tes"));
    assertThatFileFormat(RCTEXT).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByPageSource(new RcFilePageSourceFactory(FUNCTION_AND_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
    assertThatFileFormat(RCBINARY).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByPageSource(new RcFilePageSourceFactory(FUNCTION_AND_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
    assertThatFileFormat(ORC).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByPageSource(new OrcBatchPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, false, HDFS_ENVIRONMENT, STATS, 100, new StorageOrcFileTailSource(), StripeMetadataSourceFactory.of(new StorageStripeMetadataSource())));
    assertThatFileFormat(PARQUET).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).withSession(parquetPageSourceSession).isReadableByPageSource(new ParquetPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, HDFS_ENVIRONMENT, STATS, METADATA_READER));
    assertThatFileFormat(AVRO).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
    assertThatFileFormat(SEQUENCEFILE).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
    assertThatFileFormat(TEXTFILE).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
}
Also used : VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) Test(org.testng.annotations.Test)

Example 9 with StorageStripeMetadataSource

use of com.facebook.presto.orc.StorageStripeMetadataSource in project presto by prestodb.

the class TestHiveFileFormats method testOrcUseColumnNames.

@Test(dataProvider = "rowCount")
public void testOrcUseColumnNames(int rowCount) throws Exception {
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
    assertThatFileFormat(ORC).withWriteColumns(TEST_COLUMNS).withRowsCount(rowCount).withReadColumns(Lists.reverse(TEST_COLUMNS)).withSession(session).isReadableByPageSource(new OrcBatchPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, true, HDFS_ENVIRONMENT, STATS, 100, new StorageOrcFileTailSource(), StripeMetadataSourceFactory.of(new StorageStripeMetadataSource())));
}
Also used : OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) CacheConfig(com.facebook.presto.cache.CacheConfig) Test(org.testng.annotations.Test)

Example 10 with StorageStripeMetadataSource

use of com.facebook.presto.orc.StorageStripeMetadataSource in project presto by prestodb.

the class HiveClientModule method createStripeMetadataSourceFactory.

@Singleton
@Provides
public StripeMetadataSourceFactory createStripeMetadataSourceFactory(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
    StripeMetadataSource stripeMetadataSource = new StorageStripeMetadataSource();
    if (orcCacheConfig.isStripeMetadataCacheEnabled()) {
        Cache<StripeId, Slice> footerCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeFooterCacheSize().toBytes()).weigher((id, footer) -> toIntExact(((Slice) footer).getRetainedSize())).expireAfterAccess(orcCacheConfig.getStripeFooterCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
        Cache<StripeStreamId, Slice> streamCache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getStripeStreamCacheSize().toBytes()).weigher((id, stream) -> toIntExact(((Slice) stream).getRetainedSize())).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
        CacheStatsMBean footerCacheStatsMBean = new CacheStatsMBean(footerCache);
        CacheStatsMBean streamCacheStatsMBean = new CacheStatsMBean(streamCache);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeFooter"), footerCacheStatsMBean);
        exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStream"), streamCacheStatsMBean);
        Optional<Cache<StripeStreamId, List<RowGroupIndex>>> rowGroupIndexCache = Optional.empty();
        if (orcCacheConfig.isRowGroupIndexCacheEnabled()) {
            rowGroupIndexCache = Optional.of(CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getRowGroupIndexCacheSize().toBytes()).weigher((id, rowGroupIndices) -> toIntExact(((List<RowGroupIndex>) rowGroupIndices).stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum())).expireAfterAccess(orcCacheConfig.getStripeStreamCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build());
            CacheStatsMBean rowGroupIndexCacheStatsMBean = new CacheStatsMBean(rowGroupIndexCache.get());
            exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_StripeStreamRowGroupIndex"), rowGroupIndexCacheStatsMBean);
        }
        stripeMetadataSource = new CachingStripeMetadataSource(stripeMetadataSource, footerCache, streamCache, rowGroupIndexCache);
    }
    StripeMetadataSourceFactory factory = StripeMetadataSourceFactory.of(stripeMetadataSource);
    if (orcCacheConfig.isDwrfStripeCacheEnabled()) {
        factory = new DwrfAwareStripeMetadataSourceFactory(factory);
    }
    return factory;
}
Also used : StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) Module(com.google.inject.Module) PrestoS3ClientFactory(com.facebook.presto.hive.s3.PrestoS3ClientFactory) StripeMetadataSource(com.facebook.presto.orc.StripeMetadataSource) StripeId(com.facebook.presto.orc.StripeReader.StripeId) OrcCacheConfig(com.facebook.presto.orc.cache.OrcCacheConfig) ConnectorNodePartitioningProvider(com.facebook.presto.spi.connector.ConnectorNodePartitioningProvider) DwrfSelectivePageSourceFactory(com.facebook.presto.hive.orc.DwrfSelectivePageSourceFactory) EncryptionLibrary(com.facebook.presto.orc.EncryptionLibrary) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) ExportBinder.newExporter(org.weakref.jmx.guice.ExportBinder.newExporter) CachingOrcFileTailSource(com.facebook.presto.orc.cache.CachingOrcFileTailSource) Multibinder.newSetBinder(com.google.inject.multibindings.Multibinder.newSetBinder) OrcSelectivePageSourceFactory(com.facebook.presto.hive.orc.OrcSelectivePageSourceFactory) TupleDomainFilterCache(com.facebook.presto.hive.orc.TupleDomainFilterCache) EventClient(com.facebook.airlift.event.client.EventClient) ParquetFileMetadata(com.facebook.presto.parquet.cache.ParquetFileMetadata) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) PageFileWriterFactory(com.facebook.presto.hive.pagefile.PageFileWriterFactory) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ConnectorSplitManager(com.facebook.presto.spi.connector.ConnectorSplitManager) ParquetMetadataSource(com.facebook.presto.parquet.cache.ParquetMetadataSource) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Multibinder(com.google.inject.multibindings.Multibinder) Threads.daemonThreadsNamed(com.facebook.airlift.concurrent.Threads.daemonThreadsNamed) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) List(java.util.List) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) Optional(java.util.Optional) ParquetCacheConfig(com.facebook.presto.parquet.cache.ParquetCacheConfig) CacheBuilder(com.google.common.cache.CacheBuilder) TypeLiteral(com.google.inject.TypeLiteral) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) MoreExecutors.listeningDecorator(com.google.common.util.concurrent.MoreExecutors.listeningDecorator) ParquetSelectivePageSourceFactory(com.facebook.presto.hive.parquet.ParquetSelectivePageSourceFactory) Slice(io.airlift.slice.Slice) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) HivePlanOptimizerProvider(com.facebook.presto.hive.rule.HivePlanOptimizerProvider) DataSinkFactory(com.facebook.presto.hive.datasink.DataSinkFactory) Singleton(javax.inject.Singleton) Supplier(java.util.function.Supplier) JsonCodecBinder.jsonCodecBinder(com.facebook.airlift.json.JsonCodecBinder.jsonCodecBinder) ConnectorPlanOptimizerProvider(com.facebook.presto.spi.connector.ConnectorPlanOptimizerProvider) BoundedExecutor(com.facebook.airlift.concurrent.BoundedExecutor) Binder(com.google.inject.Binder) PageFilePageSourceFactory(com.facebook.presto.hive.pagefile.PageFilePageSourceFactory) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) ParquetDataSourceId(com.facebook.presto.parquet.ParquetDataSourceId) ExecutorServiceAdapter(com.facebook.airlift.concurrent.ExecutorServiceAdapter) Math.toIntExact(java.lang.Math.toIntExact) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) ConnectorPageSinkProvider(com.facebook.presto.spi.connector.ConnectorPageSinkProvider) ExecutorService(java.util.concurrent.ExecutorService) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) ObjectNames.generatedNameOf(org.weakref.jmx.ObjectNames.generatedNameOf) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) ForUnknown(com.facebook.presto.hive.HiveDwrfEncryptionProvider.ForUnknown) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) UnsupportedEncryptionLibrary(com.facebook.presto.orc.UnsupportedEncryptionLibrary) Scopes(com.google.inject.Scopes) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) HiveCachingHdfsConfiguration(com.facebook.presto.hive.cache.HiveCachingHdfsConfiguration) HivePartitionMutator(com.facebook.presto.hive.metastore.HivePartitionMutator) CachingParquetMetadataSource(com.facebook.presto.parquet.cache.CachingParquetMetadataSource) ConnectorMetadataUpdaterProvider(com.facebook.presto.spi.connector.ConnectorMetadataUpdaterProvider) SmileCodecBinder.smileCodecBinder(com.facebook.airlift.json.smile.SmileCodecBinder.smileCodecBinder) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Provides(com.google.inject.Provides) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) ForCachingFileSystem(com.facebook.presto.cache.ForCachingFileSystem) ConfigBinder.configBinder(com.facebook.airlift.configuration.ConfigBinder.configBinder) ForCryptoService(com.facebook.presto.hive.HiveDwrfEncryptionProvider.ForCryptoService) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) Cache(com.google.common.cache.Cache) MBeanExporter(org.weakref.jmx.MBeanExporter) ParquetFileWriterFactory(com.facebook.presto.hive.parquet.ParquetFileWriterFactory) StripeStreamId(com.facebook.presto.orc.StripeReader.StripeStreamId) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) CachingStripeMetadataSource(com.facebook.presto.orc.CachingStripeMetadataSource) StripeId(com.facebook.presto.orc.StripeReader.StripeId) RowGroupIndex(com.facebook.presto.orc.metadata.RowGroupIndex) Slice(io.airlift.slice.Slice) DwrfAwareStripeMetadataSourceFactory(com.facebook.presto.orc.DwrfAwareStripeMetadataSourceFactory) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) TupleDomainFilterCache(com.facebook.presto.hive.orc.TupleDomainFilterCache) Cache(com.google.common.cache.Cache) Singleton(javax.inject.Singleton) Provides(com.google.inject.Provides)

Aggregations

StorageStripeMetadataSource (com.facebook.presto.orc.StorageStripeMetadataSource)14 StorageOrcFileTailSource (com.facebook.presto.orc.cache.StorageOrcFileTailSource)13 OrcBatchPageSourceFactory (com.facebook.presto.hive.orc.OrcBatchPageSourceFactory)8 ParquetPageSourceFactory (com.facebook.presto.hive.parquet.ParquetPageSourceFactory)7 RcFilePageSourceFactory (com.facebook.presto.hive.rcfile.RcFilePageSourceFactory)7 Test (org.testng.annotations.Test)7 StripeMetadataSourceFactory (com.facebook.presto.orc.StripeMetadataSourceFactory)6 List (java.util.List)6 Optional (java.util.Optional)6 CacheConfig (com.facebook.presto.cache.CacheConfig)5 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)5 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)5 DwrfBatchPageSourceFactory (com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory)4 MetadataReader (com.facebook.presto.parquet.cache.MetadataReader)4 RuntimeStats (com.facebook.presto.common.RuntimeStats)3 ArrayType (com.facebook.presto.common.type.ArrayType)3 OutputStreamDataSinkFactory (com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory)3 ParquetFileWriterFactory (com.facebook.presto.hive.parquet.ParquetFileWriterFactory)3 OrcReader (com.facebook.presto.orc.OrcReader)3 RaptorOrcAggregatedMemoryContext (com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext)3