use of com.facebook.presto.hive.parquet.ParquetPageSourceFactory in project presto by prestodb.
the class TestHiveFileFormats method testTruncateVarcharColumn.
@Test
public void testTruncateVarcharColumn() throws Exception {
TestColumn writeColumn = new TestColumn("varchar_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(4)), new HiveVarchar("test", 4), utf8Slice("test"));
TestColumn readColumn = new TestColumn("varchar_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(3)), new HiveVarchar("tes", 3), utf8Slice("tes"));
assertThatFileFormat(RCTEXT).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(new ColumnarTextHiveRecordCursorProvider(HDFS_ENVIRONMENT)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
assertThatFileFormat(RCBINARY).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(new ColumnarBinaryHiveRecordCursorProvider(HDFS_ENVIRONMENT)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
assertThatFileFormat(ORC).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT));
assertThatFileFormat(PARQUET).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).withSession(parquetCursorSession).isReadableByRecordCursor(new ParquetRecordCursorProvider(false, HDFS_ENVIRONMENT));
assertThatFileFormat(PARQUET).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).withSession(parquetCursorPushdownSession).isReadableByRecordCursor(new ParquetRecordCursorProvider(false, HDFS_ENVIRONMENT));
assertThatFileFormat(PARQUET).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).withSession(parquetPageSourceSession).isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT));
assertThatFileFormat(PARQUET).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).withSession(parquetPageSourcePushdown).isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT));
assertThatFileFormat(AVRO).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
assertThatFileFormat(SEQUENCEFILE).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
assertThatFileFormat(TEXTFILE).withWriteColumns(ImmutableList.of(writeColumn)).withReadColumns(ImmutableList.of(readColumn)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
}
use of com.facebook.presto.hive.parquet.ParquetPageSourceFactory in project presto by prestodb.
the class TestHiveFileFormats method testFailForLongVarcharPartitionColumn.
@Test
public void testFailForLongVarcharPartitionColumn() throws Exception {
TestColumn partitionColumn = new TestColumn("partition_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(3)), "test", utf8Slice("tes"), true);
TestColumn varcharColumn = new TestColumn("varchar_column", getPrimitiveJavaObjectInspector(new VarcharTypeInfo(3)), new HiveVarchar("tes", 3), utf8Slice("tes"));
List<TestColumn> columns = ImmutableList.of(partitionColumn, varcharColumn);
HiveErrorCode expectedErrorCode = HiveErrorCode.HIVE_INVALID_PARTITION_VALUE;
String expectedMessage = "Invalid partition value 'test' for varchar(3) partition key: partition_column";
assertThatFileFormat(RCTEXT).withColumns(columns).isFailingForRecordCursor(new ColumnarTextHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage).isFailingForRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
assertThatFileFormat(RCBINARY).withColumns(columns).isFailingForRecordCursor(new ColumnarBinaryHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage).isFailingForRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
assertThatFileFormat(ORC).withColumns(columns).isFailingForPageSource(new OrcPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
assertThatFileFormat(PARQUET).withColumns(columns).withSession(parquetCursorSession).isFailingForRecordCursor(new ParquetRecordCursorProvider(false, HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
assertThatFileFormat(PARQUET).withColumns(columns).withSession(parquetCursorPushdownSession).isFailingForRecordCursor(new ParquetRecordCursorProvider(false, HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
assertThatFileFormat(PARQUET).withColumns(columns).withSession(parquetPageSourceSession).isFailingForPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
assertThatFileFormat(PARQUET).withColumns(columns).withSession(parquetPageSourcePushdown).isFailingForPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
assertThatFileFormat(SEQUENCEFILE).withColumns(columns).isFailingForRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
assertThatFileFormat(TEXTFILE).withColumns(columns).isFailingForRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
}
use of com.facebook.presto.hive.parquet.ParquetPageSourceFactory in project presto by prestodb.
the class TestHiveFileFormats method testParquetPageSourceSchemaEvolution.
@Test(dataProvider = "rowCount")
public void testParquetPageSourceSchemaEvolution(int rowCount) throws Exception {
List<TestColumn> writeColumns = getTestColumnsSupportedByParquet();
// test index-based access
boolean useParquetColumnNames = false;
List<TestColumn> readColumns = writeColumns.stream().map(column -> new TestColumn(column.getName() + "_new", column.getObjectInspector(), column.getWriteValue(), column.getExpectedValue(), column.isPartitionKey())).collect(toList());
assertThatFileFormat(PARQUET).withWriteColumns(writeColumns).withReadColumns(readColumns).withSession(parquetPageSourceSession).withRowsCount(rowCount).isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, useParquetColumnNames, HDFS_ENVIRONMENT));
// test name-based access
useParquetColumnNames = true;
readColumns = Lists.reverse(writeColumns);
assertThatFileFormat(PARQUET).withWriteColumns(writeColumns).withReadColumns(readColumns).withSession(parquetPageSourceSession).isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, useParquetColumnNames, HDFS_ENVIRONMENT));
}
use of com.facebook.presto.hive.parquet.ParquetPageSourceFactory in project presto by prestodb.
the class TestHiveFileFormats method testParquetPageSource.
@Test(dataProvider = "rowCount")
public void testParquetPageSource(int rowCount) throws Exception {
List<TestColumn> testColumns = getTestColumnsSupportedByParquet();
assertThatFileFormat(PARQUET).withColumns(testColumns).withSession(parquetPageSourceSession).withRowsCount(rowCount).isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT));
assertThatFileFormat(PARQUET).withColumns(testColumns).withSession(parquetPageSourcePushdown).withRowsCount(rowCount).isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT));
}
Aggregations