Search in sources :

Example 1 with OutputStreamDataSinkFactory

use of com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory in project presto by prestodb.

the class TestHiveFileFormats method testDwrfOptimizedWriter.

@Test(dataProvider = "rowCount")
public void testDwrfOptimizedWriter(int rowCount) throws Exception {
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setOrcOptimizedWriterEnabled(true).setOrcWriterValidationPercentage(100.0), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
    // DWRF does not support modern Hive types
    // A Presto page can not contain a map with null keys, so a page based writer can not write null keys
    List<TestColumn> testColumns = TEST_COLUMNS.stream().filter(testColumn -> !hasType(testColumn.getObjectInspector(), PrimitiveCategory.DATE, PrimitiveCategory.VARCHAR, PrimitiveCategory.CHAR, PrimitiveCategory.DECIMAL)).filter(testColumn -> !testColumn.getName().equals("t_map_null_key") && !testColumn.getName().equals("t_map_null_key_complex_value") && !testColumn.getName().equals("t_map_null_key_complex_key_value")).collect(toList());
    assertThatFileFormat(DWRF).withColumns(testColumns).withRowsCount(rowCount).withSession(session).withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, new OutputStreamDataSinkFactory(), FUNCTION_AND_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS, new OrcFileWriterConfig(), NO_ENCRYPTION)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)).isReadableByPageSource(new DwrfBatchPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, HIVE_CLIENT_CONFIG, HDFS_ENVIRONMENT, STATS, new StorageOrcFileTailSource(), StripeMetadataSourceFactory.of(new StorageStripeMetadataSource()), NO_ENCRYPTION));
}
Also used : RecordPageSource(com.facebook.presto.spi.RecordPageSource) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Test(org.testng.annotations.Test) HIVE_PARTITION_SCHEMA_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) FileSplit(org.apache.hadoop.mapred.FileSplit) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LzoCodec(io.airlift.compress.lzo.LzoCodec) DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructuralTestUtil.rowBlockOf(com.facebook.presto.tests.StructuralTestUtil.rowBlockOf) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConnectorSession(com.facebook.presto.spi.ConnectorSession) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) HIVE_CLIENT_CONFIG(com.facebook.presto.hive.HiveTestUtils.HIVE_CLIENT_CONFIG) Iterables.filter(com.google.common.collect.Iterables.filter) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) FUNCTION_RESOLUTION(com.facebook.presto.hive.HiveTestUtils.FUNCTION_RESOLUTION) ROW_EXPRESSION_SERVICE(com.facebook.presto.hive.HiveTestUtils.ROW_EXPRESSION_SERVICE) Lists(com.google.common.collect.Lists) RCTEXT(com.facebook.presto.hive.HiveStorageFormat.RCTEXT) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) CSV(com.facebook.presto.hive.HiveStorageFormat.CSV) ArrayType(com.facebook.presto.common.type.ArrayType) JSON(com.facebook.presto.hive.HiveStorageFormat.JSON) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) File(java.io.File) FUNCTION_AND_TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.FUNCTION_AND_TYPE_MANAGER) RCBINARY(com.facebook.presto.hive.HiveStorageFormat.RCBINARY) HDFS_ENVIRONMENT(com.facebook.presto.hive.HiveTestUtils.HDFS_ENVIRONMENT) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) ParquetFileWriterFactory(com.facebook.presto.hive.parquet.ParquetFileWriterFactory) RowType(com.facebook.presto.common.type.RowType) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) CacheConfig(com.facebook.presto.cache.CacheConfig) SchemaTableName(com.facebook.presto.spi.SchemaTableName) AVRO(com.facebook.presto.hive.HiveStorageFormat.AVRO) TEXTFILE(com.facebook.presto.hive.HiveStorageFormat.TEXTFILE) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) TimeZone(java.util.TimeZone) BeforeClass(org.testng.annotations.BeforeClass) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) TRUE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) StructuralTestUtil.arrayBlockOf(com.facebook.presto.tests.StructuralTestUtil.arrayBlockOf) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) Optional(java.util.Optional) ErrorCodeSupplier(com.facebook.presto.spi.ErrorCodeSupplier) DataProvider(org.testng.annotations.DataProvider) Column(com.facebook.presto.hive.metastore.Column) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) HiveTestUtils.getTypes(com.facebook.presto.hive.HiveTestUtils.getTypes) HIVE_INVALID_PARTITION_VALUE(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) Assert.assertEquals(org.testng.Assert.assertEquals) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) LzopCodec(io.airlift.compress.lzo.LzopCodec) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) NO_ENCRYPTION(com.facebook.presto.hive.HiveDwrfEncryptionProvider.NO_ENCRYPTION) StructuralTestUtil.mapBlockOf(com.facebook.presto.tests.StructuralTestUtil.mapBlockOf) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) SESSION(com.facebook.presto.hive.HiveTestUtils.SESSION) Objects.requireNonNull(java.util.Objects.requireNonNull) DEFAULT_HIVE_FILE_CONTEXT(com.facebook.presto.hive.HiveFileContext.DEFAULT_HIVE_FILE_CONTEXT) SEQUENCEFILE(com.facebook.presto.hive.HiveStorageFormat.SEQUENCEFILE) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) Storage(com.facebook.presto.hive.metastore.Storage) Assert.fail(org.testng.Assert.fail) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) CacheConfig(com.facebook.presto.cache.CacheConfig) Test(org.testng.annotations.Test)

Example 2 with OutputStreamDataSinkFactory

use of com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory in project presto by prestodb.

the class TestHiveFileFormats method testOrcOptimizedWriter.

@Test(dataProvider = "rowCount")
public void testOrcOptimizedWriter(int rowCount) throws Exception {
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setOrcOptimizedWriterEnabled(true).setOrcWriterValidationPercentage(100.0), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
    // A Presto page can not contain a map with null keys, so a page based writer can not write null keys
    List<TestColumn> testColumns = TEST_COLUMNS.stream().filter(testColumn -> !testColumn.getName().equals("t_map_null_key") && !testColumn.getName().equals("t_map_null_key_complex_value") && !testColumn.getName().equals("t_map_null_key_complex_key_value")).collect(toList());
    assertThatFileFormat(ORC).withColumns(testColumns).withRowsCount(rowCount).withSession(session).withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, new OutputStreamDataSinkFactory(), FUNCTION_AND_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS, new OrcFileWriterConfig(), NO_ENCRYPTION)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)).isReadableByPageSource(new OrcBatchPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, false, HDFS_ENVIRONMENT, STATS, 100, new StorageOrcFileTailSource(), StripeMetadataSourceFactory.of(new StorageStripeMetadataSource())));
}
Also used : RecordPageSource(com.facebook.presto.spi.RecordPageSource) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Test(org.testng.annotations.Test) HIVE_PARTITION_SCHEMA_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) FileSplit(org.apache.hadoop.mapred.FileSplit) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LzoCodec(io.airlift.compress.lzo.LzoCodec) DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructuralTestUtil.rowBlockOf(com.facebook.presto.tests.StructuralTestUtil.rowBlockOf) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConnectorSession(com.facebook.presto.spi.ConnectorSession) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) HIVE_CLIENT_CONFIG(com.facebook.presto.hive.HiveTestUtils.HIVE_CLIENT_CONFIG) Iterables.filter(com.google.common.collect.Iterables.filter) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) FUNCTION_RESOLUTION(com.facebook.presto.hive.HiveTestUtils.FUNCTION_RESOLUTION) ROW_EXPRESSION_SERVICE(com.facebook.presto.hive.HiveTestUtils.ROW_EXPRESSION_SERVICE) Lists(com.google.common.collect.Lists) RCTEXT(com.facebook.presto.hive.HiveStorageFormat.RCTEXT) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) CSV(com.facebook.presto.hive.HiveStorageFormat.CSV) ArrayType(com.facebook.presto.common.type.ArrayType) JSON(com.facebook.presto.hive.HiveStorageFormat.JSON) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) File(java.io.File) FUNCTION_AND_TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.FUNCTION_AND_TYPE_MANAGER) RCBINARY(com.facebook.presto.hive.HiveStorageFormat.RCBINARY) HDFS_ENVIRONMENT(com.facebook.presto.hive.HiveTestUtils.HDFS_ENVIRONMENT) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) ParquetFileWriterFactory(com.facebook.presto.hive.parquet.ParquetFileWriterFactory) RowType(com.facebook.presto.common.type.RowType) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) CacheConfig(com.facebook.presto.cache.CacheConfig) SchemaTableName(com.facebook.presto.spi.SchemaTableName) AVRO(com.facebook.presto.hive.HiveStorageFormat.AVRO) TEXTFILE(com.facebook.presto.hive.HiveStorageFormat.TEXTFILE) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) TimeZone(java.util.TimeZone) BeforeClass(org.testng.annotations.BeforeClass) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) TRUE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) StructuralTestUtil.arrayBlockOf(com.facebook.presto.tests.StructuralTestUtil.arrayBlockOf) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) Optional(java.util.Optional) ErrorCodeSupplier(com.facebook.presto.spi.ErrorCodeSupplier) DataProvider(org.testng.annotations.DataProvider) Column(com.facebook.presto.hive.metastore.Column) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) HiveTestUtils.getTypes(com.facebook.presto.hive.HiveTestUtils.getTypes) HIVE_INVALID_PARTITION_VALUE(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) Assert.assertEquals(org.testng.Assert.assertEquals) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) LzopCodec(io.airlift.compress.lzo.LzopCodec) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) NO_ENCRYPTION(com.facebook.presto.hive.HiveDwrfEncryptionProvider.NO_ENCRYPTION) StructuralTestUtil.mapBlockOf(com.facebook.presto.tests.StructuralTestUtil.mapBlockOf) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) SESSION(com.facebook.presto.hive.HiveTestUtils.SESSION) Objects.requireNonNull(java.util.Objects.requireNonNull) DEFAULT_HIVE_FILE_CONTEXT(com.facebook.presto.hive.HiveFileContext.DEFAULT_HIVE_FILE_CONTEXT) SEQUENCEFILE(com.facebook.presto.hive.HiveStorageFormat.SEQUENCEFILE) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) Storage(com.facebook.presto.hive.metastore.Storage) Assert.fail(org.testng.Assert.fail) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) CacheConfig(com.facebook.presto.cache.CacheConfig) Test(org.testng.annotations.Test)

Example 3 with OutputStreamDataSinkFactory

use of com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory in project presto by prestodb.

the class TestHiveMetadataFileFormatEncryptionSettings method setup.

@BeforeClass
public void setup() {
    baseDirectory = new File(Files.createTempDir(), "metastore");
    metastore = new BridgingHiveMetastore(new InMemoryHiveMetastore(baseDirectory), new HivePartitionMutator());
    executor = newCachedThreadPool(daemonThreadsNamed("hive-encryption-test-%s"));
    transactionManager = new HiveTransactionManager();
    metadataFactory = new HiveMetadataFactory(metastore, HDFS_ENVIRONMENT, new HivePartitionManager(FUNCTION_AND_TYPE_MANAGER, HIVE_CLIENT_CONFIG), DateTimeZone.forTimeZone(TimeZone.getTimeZone(ZoneId.of(HIVE_CLIENT_CONFIG.getTimeZone()))), true, false, false, false, true, true, HIVE_CLIENT_CONFIG.getMaxPartitionBatchSize(), HIVE_CLIENT_CONFIG.getMaxPartitionsPerScan(), false, FUNCTION_AND_TYPE_MANAGER, new HiveLocationService(HDFS_ENVIRONMENT), FUNCTION_RESOLUTION, ROW_EXPRESSION_SERVICE, FILTER_STATS_CALCULATOR_SERVICE, new TableParameterCodec(), PARTITION_UPDATE_CODEC, PARTITION_UPDATE_SMILE_CODEC, listeningDecorator(executor), new HiveTypeTranslator(), new HiveStagingFileCommitter(HDFS_ENVIRONMENT, listeningDecorator(executor)), new HiveZeroRowFileCreator(HDFS_ENVIRONMENT, new OutputStreamDataSinkFactory(), listeningDecorator(executor)), TEST_SERVER_VERSION, new HivePartitionObjectBuilder(), new HiveEncryptionInformationProvider(ImmutableList.of(new TestDwrfEncryptionInformationSource())), new HivePartitionStats(), new HiveFileRenamer(), HiveColumnConverterProvider.DEFAULT_COLUMN_CONVERTER_PROVIDER);
    metastore.createDatabase(METASTORE_CONTEXT, Database.builder().setDatabaseName(TEST_DB_NAME).setOwnerName("public").setOwnerType(PrincipalType.ROLE).build());
}
Also used : OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) HivePartitionMutator(com.facebook.presto.hive.metastore.HivePartitionMutator) InMemoryHiveMetastore(com.facebook.presto.hive.metastore.thrift.InMemoryHiveMetastore) File(java.io.File) BridgingHiveMetastore(com.facebook.presto.hive.metastore.thrift.BridgingHiveMetastore) BeforeClass(org.testng.annotations.BeforeClass)

Example 4 with OutputStreamDataSinkFactory

use of com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory in project presto by prestodb.

the class TestHiveSplitManager method assertRedundantColumnDomains.

private void assertRedundantColumnDomains(Range predicateRange, PartitionStatistics partitionStatistics, List<Set<ColumnHandle>> expectedRedundantColumnDomains, HiveColumnHandle columnHandle) throws Exception {
    // Prepare query predicate tuple domain
    TupleDomain<ColumnHandle> queryTupleDomain = TupleDomain.fromColumnDomains(Optional.of(ImmutableList.of(new ColumnDomain<>(columnHandle, Domain.create(SortedRangeSet.copyOf(predicateRange.getType(), ImmutableList.of(predicateRange)), false)))));
    // Prepare partition with stats
    PartitionWithStatistics partitionWithStatistics = new PartitionWithStatistics(new Partition("test_db", "test_table", ImmutableList.of(PARTITION_VALUE), new Storage(fromHiveStorageFormat(ORC), "location", Optional.empty(), true, ImmutableMap.of(), ImmutableMap.of()), COLUMNS, ImmutableMap.of(), Optional.empty(), false, true, 0), PARTITION_NAME, partitionStatistics);
    HiveClientConfig hiveClientConfig = new HiveClientConfig().setPartitionStatisticsBasedOptimizationEnabled(true);
    HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hiveClientConfig, new MetastoreClientConfig()), ImmutableSet.of()), new MetastoreClientConfig(), new NoHdfsAuthentication());
    HiveMetadataFactory metadataFactory = new HiveMetadataFactory(new TestingExtendedHiveMetastore(TEST_TABLE, partitionWithStatistics), hdfsEnvironment, new HivePartitionManager(FUNCTION_AND_TYPE_MANAGER, hiveClientConfig), DateTimeZone.forOffsetHours(1), true, false, false, false, true, true, hiveClientConfig.getMaxPartitionBatchSize(), hiveClientConfig.getMaxPartitionsPerScan(), false, FUNCTION_AND_TYPE_MANAGER, new HiveLocationService(hdfsEnvironment), FUNCTION_RESOLUTION, ROW_EXPRESSION_SERVICE, FILTER_STATS_CALCULATOR_SERVICE, new TableParameterCodec(), HiveTestUtils.PARTITION_UPDATE_CODEC, HiveTestUtils.PARTITION_UPDATE_SMILE_CODEC, executor, new HiveTypeTranslator(), new HiveStagingFileCommitter(hdfsEnvironment, executor), new HiveZeroRowFileCreator(hdfsEnvironment, new OutputStreamDataSinkFactory(), executor), TEST_SERVER_VERSION, new HivePartitionObjectBuilder(), new HiveEncryptionInformationProvider(ImmutableList.of()), new HivePartitionStats(), new HiveFileRenamer(), HiveColumnConverterProvider.DEFAULT_COLUMN_CONVERTER_PROVIDER);
    HiveSplitManager splitManager = new HiveSplitManager(new TestingHiveTransactionManager(metadataFactory), new NamenodeStats(), hdfsEnvironment, new TestingDirectoryLister(), directExecutor(), new HiveCoercionPolicy(FUNCTION_AND_TYPE_MANAGER), new CounterStat(), 100, hiveClientConfig.getMaxOutstandingSplitsSize(), hiveClientConfig.getMinPartitionBatchSize(), hiveClientConfig.getMaxPartitionBatchSize(), hiveClientConfig.getSplitLoaderConcurrency(), false, new ConfigBasedCacheQuotaRequirementProvider(new CacheConfig()), new HiveEncryptionInformationProvider(ImmutableList.of()));
    HiveColumnHandle partitionColumn = new HiveColumnHandle("ds", HIVE_STRING, parseTypeSignature(VARCHAR), MAX_PARTITION_KEY_COLUMN_INDEX, PARTITION_KEY, Optional.empty(), Optional.empty());
    List<HivePartition> partitions = ImmutableList.of(new HivePartition(new SchemaTableName("test_schema", "test_table"), PARTITION_NAME, ImmutableMap.of(partitionColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice(PARTITION_VALUE)))));
    TupleDomain<Subfield> domainPredicate = queryTupleDomain.transform(HiveColumnHandle.class::cast).transform(column -> new Subfield(column.getName(), ImmutableList.of()));
    ConnectorSplitSource splitSource = splitManager.getSplits(new HiveTransactionHandle(), new TestingConnectorSession(new HiveSessionProperties(hiveClientConfig, new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties()), new HiveTableLayoutHandle(new SchemaTableName("test_schema", "test_table"), "test_path", ImmutableList.of(partitionColumn), COLUMNS, ImmutableMap.of(), partitions, domainPredicate, TRUE_CONSTANT, ImmutableMap.of(partitionColumn.getName(), partitionColumn, columnHandle.getName(), columnHandle), queryTupleDomain, Optional.empty(), Optional.empty(), false, "layout", Optional.empty(), false), SPLIT_SCHEDULING_CONTEXT);
    List<Set<ColumnHandle>> actualRedundantColumnDomains = splitSource.getNextBatch(NOT_PARTITIONED, 100).get().getSplits().stream().map(HiveSplit.class::cast).map(HiveSplit::getRedundantColumnDomains).collect(toImmutableList());
    assertEquals(actualRedundantColumnDomains, expectedRedundantColumnDomains);
}
Also used : CounterStat(com.facebook.airlift.stats.CounterStat) Subfield(com.facebook.presto.common.Subfield) ColumnHandle(com.facebook.presto.spi.ColumnHandle) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) Storage(com.facebook.presto.hive.metastore.Storage) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) Set(java.util.Set) SortedRangeSet(com.facebook.presto.common.predicate.SortedRangeSet) ImmutableSet(com.google.common.collect.ImmutableSet) NoHdfsAuthentication(com.facebook.presto.hive.authentication.NoHdfsAuthentication) CacheConfig(com.facebook.presto.cache.CacheConfig) Partition(com.facebook.presto.hive.metastore.Partition) ConnectorSplitSource(com.facebook.presto.spi.ConnectorSplitSource) SchemaTableName(com.facebook.presto.spi.SchemaTableName) PartitionWithStatistics(com.facebook.presto.hive.metastore.PartitionWithStatistics)

Example 5 with OutputStreamDataSinkFactory

use of com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory in project presto by prestodb.

the class AbstractTestHiveFileSystem method setup.

protected void setup(String host, int port, String databaseName, BiFunction<HiveClientConfig, MetastoreClientConfig, HdfsConfiguration> hdfsConfigurationProvider, boolean s3SelectPushdownEnabled) {
    database = databaseName;
    table = new SchemaTableName(database, "presto_test_external_fs");
    String random = UUID.randomUUID().toString().toLowerCase(ENGLISH).replace("-", "");
    temporaryCreateTable = new SchemaTableName(database, "tmp_presto_test_create_" + random);
    config = new HiveClientConfig().setS3SelectPushdownEnabled(s3SelectPushdownEnabled);
    cacheConfig = new CacheConfig();
    metastoreClientConfig = new MetastoreClientConfig();
    String proxy = System.getProperty("hive.metastore.thrift.client.socks-proxy");
    if (proxy != null) {
        metastoreClientConfig.setMetastoreSocksProxy(HostAndPort.fromString(proxy));
    }
    HiveCluster hiveCluster = new TestingHiveCluster(metastoreClientConfig, host, port);
    ExecutorService executor = newCachedThreadPool(daemonThreadsNamed("hive-%s"));
    HivePartitionManager hivePartitionManager = new HivePartitionManager(FUNCTION_AND_TYPE_MANAGER, config);
    HdfsConfiguration hdfsConfiguration = hdfsConfigurationProvider.apply(config, metastoreClientConfig);
    hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, metastoreClientConfig, new NoHdfsAuthentication());
    ColumnConverterProvider columnConverterProvider = HiveColumnConverterProvider.DEFAULT_COLUMN_CONVERTER_PROVIDER;
    metastoreClient = new TestingHiveMetastore(new BridgingHiveMetastore(new ThriftHiveMetastore(hiveCluster, metastoreClientConfig), new HivePartitionMutator()), executor, metastoreClientConfig, getBasePath(), hdfsEnvironment);
    locationService = new HiveLocationService(hdfsEnvironment);
    metadataFactory = new HiveMetadataFactory(config, metastoreClientConfig, metastoreClient, hdfsEnvironment, hivePartitionManager, newDirectExecutorService(), FUNCTION_AND_TYPE_MANAGER, locationService, FUNCTION_RESOLUTION, ROW_EXPRESSION_SERVICE, FILTER_STATS_CALCULATOR_SERVICE, new TableParameterCodec(), HiveTestUtils.PARTITION_UPDATE_CODEC, HiveTestUtils.PARTITION_UPDATE_SMILE_CODEC, new HiveTypeTranslator(), new HiveStagingFileCommitter(hdfsEnvironment, listeningDecorator(executor)), new HiveZeroRowFileCreator(hdfsEnvironment, new OutputStreamDataSinkFactory(), listeningDecorator(executor)), new NodeVersion("test_version"), new HivePartitionObjectBuilder(), new HiveEncryptionInformationProvider(ImmutableList.of()), new HivePartitionStats(), new HiveFileRenamer(), columnConverterProvider);
    transactionManager = new HiveTransactionManager();
    splitManager = new HiveSplitManager(transactionManager, new NamenodeStats(), hdfsEnvironment, new CachingDirectoryLister(new HadoopDirectoryLister(), new HiveClientConfig()), new BoundedExecutor(executor, config.getMaxSplitIteratorThreads()), new HiveCoercionPolicy(FUNCTION_AND_TYPE_MANAGER), new CounterStat(), config.getMaxOutstandingSplits(), config.getMaxOutstandingSplitsSize(), config.getMinPartitionBatchSize(), config.getMaxPartitionBatchSize(), config.getSplitLoaderConcurrency(), config.getRecursiveDirWalkerEnabled(), new ConfigBasedCacheQuotaRequirementProvider(cacheConfig), new HiveEncryptionInformationProvider(ImmutableSet.of()));
    pageSinkProvider = new HivePageSinkProvider(getDefaultHiveFileWriterFactories(config, metastoreClientConfig), hdfsEnvironment, PAGE_SORTER, metastoreClient, new GroupByHashPageIndexerFactory(new JoinCompiler(MetadataManager.createTestMetadataManager(), new FeaturesConfig())), FUNCTION_AND_TYPE_MANAGER, config, metastoreClientConfig, locationService, HiveTestUtils.PARTITION_UPDATE_CODEC, HiveTestUtils.PARTITION_UPDATE_SMILE_CODEC, new TestingNodeManager("fake-environment"), new HiveEventClient(), new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()), new HiveWriterStats(), getDefaultOrcFileWriterFactory(config, metastoreClientConfig), columnConverterProvider);
    pageSourceProvider = new HivePageSourceProvider(config, hdfsEnvironment, getDefaultHiveRecordCursorProvider(config, metastoreClientConfig), getDefaultHiveBatchPageSourceFactories(config, metastoreClientConfig), getDefaultHiveSelectivePageSourceFactories(config, metastoreClientConfig), FUNCTION_AND_TYPE_MANAGER, ROW_EXPRESSION_SERVICE);
}
Also used : CounterStat(com.facebook.airlift.stats.CounterStat) HivePartitionMutator(com.facebook.presto.hive.metastore.HivePartitionMutator) TestingNodeManager(com.facebook.presto.testing.TestingNodeManager) BridgingHiveMetastore(com.facebook.presto.hive.metastore.thrift.BridgingHiveMetastore) JoinCompiler(com.facebook.presto.sql.gen.JoinCompiler) GroupByHashPageIndexerFactory(com.facebook.presto.GroupByHashPageIndexerFactory) TestingHiveCluster(com.facebook.presto.hive.metastore.thrift.TestingHiveCluster) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) FeaturesConfig(com.facebook.presto.sql.analyzer.FeaturesConfig) TestingHiveCluster(com.facebook.presto.hive.metastore.thrift.TestingHiveCluster) HiveCluster(com.facebook.presto.hive.metastore.thrift.HiveCluster) NoHdfsAuthentication(com.facebook.presto.hive.authentication.NoHdfsAuthentication) CacheConfig(com.facebook.presto.cache.CacheConfig) ThriftHiveMetastore(com.facebook.presto.hive.metastore.thrift.ThriftHiveMetastore) SchemaTableName(com.facebook.presto.spi.SchemaTableName) BoundedExecutor(com.facebook.airlift.concurrent.BoundedExecutor) MoreExecutors.newDirectExecutorService(com.google.common.util.concurrent.MoreExecutors.newDirectExecutorService) ExecutorService(java.util.concurrent.ExecutorService)

Aggregations

OutputStreamDataSinkFactory (com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory)6 CacheConfig (com.facebook.presto.cache.CacheConfig)5 CounterStat (com.facebook.airlift.stats.CounterStat)3 NoHdfsAuthentication (com.facebook.presto.hive.authentication.NoHdfsAuthentication)3 Storage (com.facebook.presto.hive.metastore.Storage)3 File (java.io.File)3 BeforeClass (org.testng.annotations.BeforeClass)3 GroupByHashPageIndexerFactory (com.facebook.presto.GroupByHashPageIndexerFactory)2 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)2 ArrayType (com.facebook.presto.common.type.ArrayType)2 BIGINT (com.facebook.presto.common.type.BigintType.BIGINT)2 BOOLEAN (com.facebook.presto.common.type.BooleanType.BOOLEAN)2 DOUBLE (com.facebook.presto.common.type.DoubleType.DOUBLE)2 INTEGER (com.facebook.presto.common.type.IntegerType.INTEGER)2 RowType (com.facebook.presto.common.type.RowType)2 VarcharType.createUnboundedVarcharType (com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType)2 TRUE_CONSTANT (com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT)2 NO_ENCRYPTION (com.facebook.presto.hive.HiveDwrfEncryptionProvider.NO_ENCRYPTION)2 HIVE_INVALID_PARTITION_VALUE (com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE)2 HIVE_PARTITION_SCHEMA_MISMATCH (com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH)2