Search in sources :

Example 1 with ParquetPageSourceFactory

use of com.facebook.presto.hive.parquet.ParquetPageSourceFactory in project presto by prestodb.

the class TestHiveFileFormats method testTruncateVarcharColumn.

@Test
public void testTruncateVarcharColumn() throws Exception {
    TestColumn writeColumn = new TestColumn("varchar_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(4)), new HiveVarchar("test", 4), utf8Slice("test"));
    TestColumn readColumn = new TestColumn("varchar_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(3)), new HiveVarchar("tes", 3), utf8Slice("tes"));
    assertThatFileFormat(RCTEXT).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(new ColumnarTextHiveRecordCursorProvider(HDFS_ENVIRONMENT)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
    assertThatFileFormat(RCBINARY).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(new ColumnarBinaryHiveRecordCursorProvider(HDFS_ENVIRONMENT)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
    assertThatFileFormat(ORC).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT));
    assertThatFileFormat(PARQUET).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).withSession(parquetCursorSession).isReadableByRecordCursor(new ParquetRecordCursorProvider(false, HDFS_ENVIRONMENT));
    assertThatFileFormat(PARQUET).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).withSession(parquetCursorPushdownSession).isReadableByRecordCursor(new ParquetRecordCursorProvider(false, HDFS_ENVIRONMENT));
    assertThatFileFormat(PARQUET).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).withSession(parquetPageSourceSession).isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT));
    assertThatFileFormat(PARQUET).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).withSession(parquetPageSourcePushdown).isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT));
    assertThatFileFormat(AVRO).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
    assertThatFileFormat(SEQUENCEFILE).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
    assertThatFileFormat(TEXTFILE).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
}
Also used : ParquetRecordCursorProvider(com.facebook.presto.hive.parquet.ParquetRecordCursorProvider) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) OrcPageSourceFactory(com.facebook.presto.hive.orc.OrcPageSourceFactory) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) Test(org.testng.annotations.Test)

Example 2 with ParquetPageSourceFactory

use of com.facebook.presto.hive.parquet.ParquetPageSourceFactory in project presto by prestodb.

the class TestHiveFileFormats method testFailForLongVarcharPartitionColumn.

@Test
public void testFailForLongVarcharPartitionColumn() throws Exception {
    TestColumn partitionColumn = new TestColumn("partition_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(3)), "test", utf8Slice("tes"), true);
    TestColumn varcharColumn = new TestColumn("varchar_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(3)), new HiveVarchar("tes", 3), utf8Slice("tes"));
    List<TestColumn> columns = ImmutableList.of(partitionColumn, varcharColumn);
    HiveErrorCode expectedErrorCode = HiveErrorCode.HIVE_INVALID_PARTITION_VALUE;
    String expectedMessage = "Invalid partition value 'test' for varchar(3) partition key: partition_column";
    assertThatFileFormat(RCTEXT).withColumns(columns).isFailingForRecordCursor(new ColumnarTextHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage).isFailingForRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
    assertThatFileFormat(RCBINARY).withColumns(columns).isFailingForRecordCursor(new ColumnarBinaryHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage).isFailingForRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
    assertThatFileFormat(ORC).withColumns(columns).isFailingForPageSource(new OrcPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
    assertThatFileFormat(PARQUET).withColumns(columns).withSession(parquetCursorSession).isFailingForRecordCursor(new ParquetRecordCursorProvider(false, HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
    assertThatFileFormat(PARQUET).withColumns(columns).withSession(parquetCursorPushdownSession).isFailingForRecordCursor(new ParquetRecordCursorProvider(false, HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
    assertThatFileFormat(PARQUET).withColumns(columns).withSession(parquetPageSourceSession).isFailingForPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
    assertThatFileFormat(PARQUET).withColumns(columns).withSession(parquetPageSourcePushdown).isFailingForPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
    assertThatFileFormat(SEQUENCEFILE).withColumns(columns).isFailingForRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
    assertThatFileFormat(TEXTFILE).withColumns(columns).isFailingForRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
}
Also used : ParquetRecordCursorProvider(com.facebook.presto.hive.parquet.ParquetRecordCursorProvider) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) OrcPageSourceFactory(com.facebook.presto.hive.orc.OrcPageSourceFactory) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) Test(org.testng.annotations.Test)

Example 3 with ParquetPageSourceFactory

use of com.facebook.presto.hive.parquet.ParquetPageSourceFactory in project presto by prestodb.

the class TestHiveFileFormats method testParquetPageSourceSchemaEvolution.

@Test(dataProvider = "rowCount")
public void testParquetPageSourceSchemaEvolution(int rowCount) throws Exception {
    List<TestColumn> writeColumns = getTestColumnsSupportedByParquet();
    // test index-based access
    boolean useParquetColumnNames = false;
    List<TestColumn> readColumns = writeColumns.stream().map(column -> new TestColumn(column.getName() + "_new", column.getObjectInspector(), column.getWriteValue(), column.getExpectedValue(), column.isPartitionKey())).collect(toList());
    assertThatFileFormat(PARQUET).withWriteColumns(writeColumns).withReadColumns(readColumns).withSession(parquetPageSourceSession).withRowsCount(rowCount).isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, useParquetColumnNames, HDFS_ENVIRONMENT));
    // test name-based access
    useParquetColumnNames = true;
    readColumns = Lists.reverse(writeColumns);
    assertThatFileFormat(PARQUET).withWriteColumns(writeColumns).withReadColumns(readColumns).withSession(parquetPageSourceSession).isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, useParquetColumnNames, HDFS_ENVIRONMENT));
}
Also used : RecordPageSource(com.facebook.presto.spi.RecordPageSource) DateTimeZone(org.joda.time.DateTimeZone) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) Iterables.transform(com.google.common.collect.Iterables.transform) OrcPageSourceFactory(com.facebook.presto.hive.orc.OrcPageSourceFactory) Test(org.testng.annotations.Test) RowType(com.facebook.presto.type.RowType) FileSplit(org.apache.hadoop.mapred.FileSplit) Predicates.not(com.google.common.base.Predicates.not) Configuration(org.apache.hadoop.conf.Configuration) AVRO(com.facebook.presto.hive.HiveStorageFormat.AVRO) Path(org.apache.hadoop.fs.Path) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ImmutableCollectors.toImmutableList(com.facebook.presto.util.ImmutableCollectors.toImmutableList) TEXTFILE(com.facebook.presto.hive.HiveStorageFormat.TEXTFILE) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) BeforeClass(org.testng.annotations.BeforeClass) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) StructuralTestUtil.rowBlockOf(com.facebook.presto.tests.StructuralTestUtil.rowBlockOf) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Collectors(java.util.stream.Collectors) ConnectorSession(com.facebook.presto.spi.ConnectorSession) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) StructuralTestUtil.arrayBlockOf(com.facebook.presto.tests.StructuralTestUtil.arrayBlockOf) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.TYPE_MANAGER) Optional(java.util.Optional) INTEGER(com.facebook.presto.spi.type.IntegerType.INTEGER) Iterables.filter(com.google.common.collect.Iterables.filter) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) Joiner(com.google.common.base.Joiner) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) DataProvider(org.testng.annotations.DataProvider) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) ArrayType(com.facebook.presto.type.ArrayType) HiveTestUtils.getTypes(com.facebook.presto.hive.HiveTestUtils.getTypes) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) Assert.assertEquals(org.testng.Assert.assertEquals) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) SESSION(com.facebook.presto.hive.HiveTestUtils.SESSION) DwrfPageSourceFactory(com.facebook.presto.hive.orc.DwrfPageSourceFactory) RCTEXT(com.facebook.presto.hive.HiveStorageFormat.RCTEXT) Objects.requireNonNull(java.util.Objects.requireNonNull) ParquetRecordCursorProvider(com.facebook.presto.hive.parquet.ParquetRecordCursorProvider) JSON(com.facebook.presto.hive.HiveStorageFormat.JSON) SEQUENCEFILE(com.facebook.presto.hive.HiveStorageFormat.SEQUENCEFILE) Properties(java.util.Properties) Assert.fail(org.testng.Assert.fail) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) File(java.io.File) RCBINARY(com.facebook.presto.hive.HiveStorageFormat.RCBINARY) VarcharType.createUnboundedVarcharType(com.facebook.presto.spi.type.VarcharType.createUnboundedVarcharType) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) HDFS_ENVIRONMENT(com.facebook.presto.hive.HiveTestUtils.HDFS_ENVIRONMENT) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) Test(org.testng.annotations.Test)

Example 4 with ParquetPageSourceFactory

use of com.facebook.presto.hive.parquet.ParquetPageSourceFactory in project presto by prestodb.

the class TestHiveFileFormats method testParquetPageSource.

@Test(dataProvider = "rowCount")
public void testParquetPageSource(int rowCount) throws Exception {
    List<TestColumn> testColumns = getTestColumnsSupportedByParquet();
    assertThatFileFormat(PARQUET).withColumns(testColumns).withSession(parquetPageSourceSession).withRowsCount(rowCount).isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT));
    assertThatFileFormat(PARQUET).withColumns(testColumns).withSession(parquetPageSourcePushdown).withRowsCount(rowCount).isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT));
}
Also used : ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) Test(org.testng.annotations.Test)

Aggregations

ParquetPageSourceFactory (com.facebook.presto.hive.parquet.ParquetPageSourceFactory)4 Test (org.testng.annotations.Test)4 OrcPageSourceFactory (com.facebook.presto.hive.orc.OrcPageSourceFactory)3 ParquetRecordCursorProvider (com.facebook.presto.hive.parquet.ParquetRecordCursorProvider)3 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)3 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)3 AVRO (com.facebook.presto.hive.HiveStorageFormat.AVRO)1 DWRF (com.facebook.presto.hive.HiveStorageFormat.DWRF)1 JSON (com.facebook.presto.hive.HiveStorageFormat.JSON)1 ORC (com.facebook.presto.hive.HiveStorageFormat.ORC)1 PARQUET (com.facebook.presto.hive.HiveStorageFormat.PARQUET)1 RCBINARY (com.facebook.presto.hive.HiveStorageFormat.RCBINARY)1 RCTEXT (com.facebook.presto.hive.HiveStorageFormat.RCTEXT)1 SEQUENCEFILE (com.facebook.presto.hive.HiveStorageFormat.SEQUENCEFILE)1 TEXTFILE (com.facebook.presto.hive.HiveStorageFormat.TEXTFILE)1 HDFS_ENVIRONMENT (com.facebook.presto.hive.HiveTestUtils.HDFS_ENVIRONMENT)1 SESSION (com.facebook.presto.hive.HiveTestUtils.SESSION)1 TYPE_MANAGER (com.facebook.presto.hive.HiveTestUtils.TYPE_MANAGER)1 HiveTestUtils.getTypes (com.facebook.presto.hive.HiveTestUtils.getTypes)1 DwrfPageSourceFactory (com.facebook.presto.hive.orc.DwrfPageSourceFactory)1