Search in sources :

Example 1 with FILE_INPUT_FORMAT

use of org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT in project presto by prestodb.

the class TestHiveFileFormats method testCursorProvider.

private void testCursorProvider(HiveRecordCursorProvider cursorProvider, FileSplit split, HiveStorageFormat storageFormat, List<TestColumn> testColumns, int rowCount) throws IOException {
    Properties splitProperties = new Properties();
    splitProperties.setProperty(FILE_INPUT_FORMAT, storageFormat.getInputFormat());
    splitProperties.setProperty(SERIALIZATION_LIB, storageFormat.getSerDe());
    splitProperties.setProperty("columns", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getName)));
    splitProperties.setProperty("columns.types", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getType)));
    List<HivePartitionKey> partitionKeys = testColumns.stream().filter(TestColumn::isPartitionKey).map(input -> new HivePartitionKey(input.getName(), HiveType.valueOf(input.getObjectInspector().getTypeName()), (String) input.getWriteValue())).collect(toList());
    Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(cursorProvider), ImmutableSet.of(), "test", new Configuration(), SESSION, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), splitProperties, TupleDomain.all(), getColumnHandles(testColumns), partitionKeys, DateTimeZone.getDefault(), TYPE_MANAGER, ImmutableMap.of());
    RecordCursor cursor = ((RecordPageSource) pageSource.get()).getCursor();
    checkCursor(cursor, testColumns, rowCount);
}
Also used : RecordPageSource(com.facebook.presto.spi.RecordPageSource) DateTimeZone(org.joda.time.DateTimeZone) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) Iterables.transform(com.google.common.collect.Iterables.transform) OrcPageSourceFactory(com.facebook.presto.hive.orc.OrcPageSourceFactory) Test(org.testng.annotations.Test) RowType(com.facebook.presto.type.RowType) FileSplit(org.apache.hadoop.mapred.FileSplit) Predicates.not(com.google.common.base.Predicates.not) Configuration(org.apache.hadoop.conf.Configuration) AVRO(com.facebook.presto.hive.HiveStorageFormat.AVRO) Path(org.apache.hadoop.fs.Path) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ImmutableCollectors.toImmutableList(com.facebook.presto.util.ImmutableCollectors.toImmutableList) TEXTFILE(com.facebook.presto.hive.HiveStorageFormat.TEXTFILE) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) BeforeClass(org.testng.annotations.BeforeClass) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) StructuralTestUtil.rowBlockOf(com.facebook.presto.tests.StructuralTestUtil.rowBlockOf) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Collectors(java.util.stream.Collectors) ConnectorSession(com.facebook.presto.spi.ConnectorSession) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) StructuralTestUtil.arrayBlockOf(com.facebook.presto.tests.StructuralTestUtil.arrayBlockOf) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.TYPE_MANAGER) Optional(java.util.Optional) INTEGER(com.facebook.presto.spi.type.IntegerType.INTEGER) Iterables.filter(com.google.common.collect.Iterables.filter) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) Joiner(com.google.common.base.Joiner) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) DataProvider(org.testng.annotations.DataProvider) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) ArrayType(com.facebook.presto.type.ArrayType) HiveTestUtils.getTypes(com.facebook.presto.hive.HiveTestUtils.getTypes) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) Assert.assertEquals(org.testng.Assert.assertEquals) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) SESSION(com.facebook.presto.hive.HiveTestUtils.SESSION) DwrfPageSourceFactory(com.facebook.presto.hive.orc.DwrfPageSourceFactory) RCTEXT(com.facebook.presto.hive.HiveStorageFormat.RCTEXT) Objects.requireNonNull(java.util.Objects.requireNonNull) ParquetRecordCursorProvider(com.facebook.presto.hive.parquet.ParquetRecordCursorProvider) JSON(com.facebook.presto.hive.HiveStorageFormat.JSON) SEQUENCEFILE(com.facebook.presto.hive.HiveStorageFormat.SEQUENCEFILE) Properties(java.util.Properties) Assert.fail(org.testng.Assert.fail) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) File(java.io.File) RCBINARY(com.facebook.presto.hive.HiveStorageFormat.RCBINARY) VarcharType.createUnboundedVarcharType(com.facebook.presto.spi.type.VarcharType.createUnboundedVarcharType) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) HDFS_ENVIRONMENT(com.facebook.presto.hive.HiveTestUtils.HDFS_ENVIRONMENT) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) RecordCursor(com.facebook.presto.spi.RecordCursor) Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) RecordPageSource(com.facebook.presto.spi.RecordPageSource)

Example 2 with FILE_INPUT_FORMAT

use of org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT in project presto by prestodb.

the class TestHiveFileFormats method testPageSourceFactory.

private void testPageSourceFactory(HivePageSourceFactory sourceFactory, FileSplit split, HiveStorageFormat storageFormat, List<TestColumn> testColumns, ConnectorSession session, int rowCount) throws IOException {
    Properties splitProperties = new Properties();
    splitProperties.setProperty(FILE_INPUT_FORMAT, storageFormat.getInputFormat());
    splitProperties.setProperty(SERIALIZATION_LIB, storageFormat.getSerDe());
    splitProperties.setProperty("columns", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getName)));
    splitProperties.setProperty("columns.types", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getType)));
    List<HivePartitionKey> partitionKeys = testColumns.stream().filter(TestColumn::isPartitionKey).map(input -> new HivePartitionKey(input.getName(), HiveType.valueOf(input.getObjectInspector().getTypeName()), (String) input.getWriteValue())).collect(toList());
    List<HiveColumnHandle> columnHandles = getColumnHandles(testColumns);
    Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(), ImmutableSet.of(sourceFactory), "test", new Configuration(), session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), splitProperties, TupleDomain.all(), columnHandles, partitionKeys, DateTimeZone.getDefault(), TYPE_MANAGER, ImmutableMap.of());
    assertTrue(pageSource.isPresent());
    checkPageSource(pageSource.get(), testColumns, getTypes(columnHandles), rowCount);
}
Also used : RecordPageSource(com.facebook.presto.spi.RecordPageSource) DateTimeZone(org.joda.time.DateTimeZone) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) Iterables.transform(com.google.common.collect.Iterables.transform) OrcPageSourceFactory(com.facebook.presto.hive.orc.OrcPageSourceFactory) Test(org.testng.annotations.Test) RowType(com.facebook.presto.type.RowType) FileSplit(org.apache.hadoop.mapred.FileSplit) Predicates.not(com.google.common.base.Predicates.not) Configuration(org.apache.hadoop.conf.Configuration) AVRO(com.facebook.presto.hive.HiveStorageFormat.AVRO) Path(org.apache.hadoop.fs.Path) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ImmutableCollectors.toImmutableList(com.facebook.presto.util.ImmutableCollectors.toImmutableList) TEXTFILE(com.facebook.presto.hive.HiveStorageFormat.TEXTFILE) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) BeforeClass(org.testng.annotations.BeforeClass) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) StructuralTestUtil.rowBlockOf(com.facebook.presto.tests.StructuralTestUtil.rowBlockOf) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Collectors(java.util.stream.Collectors) ConnectorSession(com.facebook.presto.spi.ConnectorSession) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) StructuralTestUtil.arrayBlockOf(com.facebook.presto.tests.StructuralTestUtil.arrayBlockOf) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.TYPE_MANAGER) Optional(java.util.Optional) INTEGER(com.facebook.presto.spi.type.IntegerType.INTEGER) Iterables.filter(com.google.common.collect.Iterables.filter) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) Joiner(com.google.common.base.Joiner) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) DataProvider(org.testng.annotations.DataProvider) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) ArrayType(com.facebook.presto.type.ArrayType) HiveTestUtils.getTypes(com.facebook.presto.hive.HiveTestUtils.getTypes) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) Assert.assertEquals(org.testng.Assert.assertEquals) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) SESSION(com.facebook.presto.hive.HiveTestUtils.SESSION) DwrfPageSourceFactory(com.facebook.presto.hive.orc.DwrfPageSourceFactory) RCTEXT(com.facebook.presto.hive.HiveStorageFormat.RCTEXT) Objects.requireNonNull(java.util.Objects.requireNonNull) ParquetRecordCursorProvider(com.facebook.presto.hive.parquet.ParquetRecordCursorProvider) JSON(com.facebook.presto.hive.HiveStorageFormat.JSON) SEQUENCEFILE(com.facebook.presto.hive.HiveStorageFormat.SEQUENCEFILE) Properties(java.util.Properties) Assert.fail(org.testng.Assert.fail) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) File(java.io.File) RCBINARY(com.facebook.presto.hive.HiveStorageFormat.RCBINARY) VarcharType.createUnboundedVarcharType(com.facebook.presto.spi.type.VarcharType.createUnboundedVarcharType) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) HDFS_ENVIRONMENT(com.facebook.presto.hive.HiveTestUtils.HDFS_ENVIRONMENT) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource)

Aggregations

AVRO (com.facebook.presto.hive.HiveStorageFormat.AVRO)2 DWRF (com.facebook.presto.hive.HiveStorageFormat.DWRF)2 JSON (com.facebook.presto.hive.HiveStorageFormat.JSON)2 ORC (com.facebook.presto.hive.HiveStorageFormat.ORC)2 PARQUET (com.facebook.presto.hive.HiveStorageFormat.PARQUET)2 RCBINARY (com.facebook.presto.hive.HiveStorageFormat.RCBINARY)2 RCTEXT (com.facebook.presto.hive.HiveStorageFormat.RCTEXT)2 SEQUENCEFILE (com.facebook.presto.hive.HiveStorageFormat.SEQUENCEFILE)2 TEXTFILE (com.facebook.presto.hive.HiveStorageFormat.TEXTFILE)2 HDFS_ENVIRONMENT (com.facebook.presto.hive.HiveTestUtils.HDFS_ENVIRONMENT)2 SESSION (com.facebook.presto.hive.HiveTestUtils.SESSION)2 TYPE_MANAGER (com.facebook.presto.hive.HiveTestUtils.TYPE_MANAGER)2 HiveTestUtils.getTypes (com.facebook.presto.hive.HiveTestUtils.getTypes)2 DwrfPageSourceFactory (com.facebook.presto.hive.orc.DwrfPageSourceFactory)2 OrcPageSourceFactory (com.facebook.presto.hive.orc.OrcPageSourceFactory)2 ParquetPageSourceFactory (com.facebook.presto.hive.parquet.ParquetPageSourceFactory)2 ParquetRecordCursorProvider (com.facebook.presto.hive.parquet.ParquetRecordCursorProvider)2 RcFilePageSourceFactory (com.facebook.presto.hive.rcfile.RcFilePageSourceFactory)2 ConnectorPageSource (com.facebook.presto.spi.ConnectorPageSource)2 ConnectorSession (com.facebook.presto.spi.ConnectorSession)2