use of com.facebook.presto.hive.HiveTestUtils.FUNCTION_RESOLUTION in project presto by prestodb.
the class TestHiveFileFormats method testDwrfOptimizedWriter.
@Test(dataProvider = "rowCount")
public void testDwrfOptimizedWriter(int rowCount) throws Exception {
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setOrcOptimizedWriterEnabled(true).setOrcWriterValidationPercentage(100.0), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
// DWRF does not support modern Hive types
// A Presto page can not contain a map with null keys, so a page based writer can not write null keys
List<TestColumn> testColumns = TEST_COLUMNS.stream().filter(testColumn -> !hasType(testColumn.getObjectInspector(), PrimitiveCategory.DATE, PrimitiveCategory.VARCHAR, PrimitiveCategory.CHAR, PrimitiveCategory.DECIMAL)).filter(testColumn -> !testColumn.getName().equals("t_map_null_key") && !testColumn.getName().equals("t_map_null_key_complex_value") && !testColumn.getName().equals("t_map_null_key_complex_key_value")).collect(toList());
assertThatFileFormat(DWRF).withColumns(testColumns).withRowsCount(rowCount).withSession(session).withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, new OutputStreamDataSinkFactory(), FUNCTION_AND_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS, new OrcFileWriterConfig(), NO_ENCRYPTION)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)).isReadableByPageSource(new DwrfBatchPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, HIVE_CLIENT_CONFIG, HDFS_ENVIRONMENT, STATS, new StorageOrcFileTailSource(), StripeMetadataSourceFactory.of(new StorageStripeMetadataSource()), NO_ENCRYPTION));
}
use of com.facebook.presto.hive.HiveTestUtils.FUNCTION_RESOLUTION in project presto by prestodb.
the class TestHiveFileFormats method testOrcOptimizedWriter.
@Test(dataProvider = "rowCount")
public void testOrcOptimizedWriter(int rowCount) throws Exception {
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setOrcOptimizedWriterEnabled(true).setOrcWriterValidationPercentage(100.0), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
// A Presto page can not contain a map with null keys, so a page based writer can not write null keys
List<TestColumn> testColumns = TEST_COLUMNS.stream().filter(testColumn -> !testColumn.getName().equals("t_map_null_key") && !testColumn.getName().equals("t_map_null_key_complex_value") && !testColumn.getName().equals("t_map_null_key_complex_key_value")).collect(toList());
assertThatFileFormat(ORC).withColumns(testColumns).withRowsCount(rowCount).withSession(session).withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, new OutputStreamDataSinkFactory(), FUNCTION_AND_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS, new OrcFileWriterConfig(), NO_ENCRYPTION)).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)).isReadableByPageSource(new OrcBatchPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, false, HDFS_ENVIRONMENT, STATS, 100, new StorageOrcFileTailSource(), StripeMetadataSourceFactory.of(new StorageStripeMetadataSource())));
}
use of com.facebook.presto.hive.HiveTestUtils.FUNCTION_RESOLUTION in project presto by prestodb.
the class AbstractTestHiveClient method doTestMismatchSchemaTable.
protected void doTestMismatchSchemaTable(SchemaTableName schemaTableName, HiveStorageFormat storageFormat, List<ColumnMetadata> tableBefore, MaterializedResult dataBefore, List<ColumnMetadata> tableAfter, MaterializedResult dataAfter, List<RowExpression> afterFilters, List<Predicate<MaterializedRow>> afterResultPredicates) throws Exception {
String schemaName = schemaTableName.getSchemaName();
String tableName = schemaTableName.getTableName();
doCreateEmptyTable(schemaTableName, storageFormat, tableBefore);
// insert the data
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName);
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, TEST_HIVE_PAGE_SINK_CONTEXT);
sink.appendPage(dataBefore.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
transaction.commit();
}
// load the table and verify the data
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName);
List<ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle).values().stream().filter(columnHandle -> !((HiveColumnHandle) columnHandle).isHidden()).collect(toList());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), dataBefore.getMaterializedRows());
transaction.commit();
}
// alter the table schema
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
MetastoreContext metastoreContext = new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), false, DEFAULT_COLUMN_CONVERTER_PROVIDER);
PrincipalPrivileges principalPrivileges = testingPrincipalPrivilege(session);
Table oldTable = transaction.getMetastore().getTable(metastoreContext, schemaName, tableName).get();
HiveTypeTranslator hiveTypeTranslator = new HiveTypeTranslator();
List<Column> dataColumns = tableAfter.stream().filter(columnMetadata -> !columnMetadata.getName().equals("ds")).map(columnMetadata -> new Column(columnMetadata.getName(), toHiveType(hiveTypeTranslator, columnMetadata.getType()), Optional.empty(), Optional.empty())).collect(toList());
Table.Builder newTable = Table.builder(oldTable).setDataColumns(dataColumns);
transaction.getMetastore().replaceView(metastoreContext, schemaName, tableName, newTable.build(), principalPrivileges);
transaction.commit();
}
// load the altered table and verify the data
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName);
List<ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle).values().stream().filter(columnHandle -> !((HiveColumnHandle) columnHandle).isHidden()).collect(toList());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), dataAfter.getMaterializedRows());
int filterCount = afterFilters.size();
for (int i = 0; i < filterCount; i++) {
RowExpression predicate = afterFilters.get(i);
ConnectorTableLayoutHandle layoutHandle = pushdownFilter(session, metadata, transaction.getMetastore(), ROW_EXPRESSION_SERVICE, FUNCTION_RESOLUTION, hivePartitionManager, METADATA.getFunctionAndTypeManager(), tableHandle, predicate, Optional.empty()).getLayout().getHandle();
// Read all columns with a filter
MaterializedResult filteredResult = readTable(transaction, tableHandle, layoutHandle, columnHandles, session, OptionalInt.empty(), Optional.empty());
Predicate<MaterializedRow> rowPredicate = afterResultPredicates.get(i);
List<MaterializedRow> expectedRows = dataAfter.getMaterializedRows().stream().filter(rowPredicate::apply).collect(toList());
assertEqualsIgnoreOrder(filteredResult.getMaterializedRows(), expectedRows);
// Read all columns except the ones used in the filter
Set<String> filterColumnNames = extractUnique(predicate).stream().map(VariableReferenceExpression::getName).collect(toImmutableSet());
List<ColumnHandle> nonFilterColumns = columnHandles.stream().filter(column -> !filterColumnNames.contains(((HiveColumnHandle) column).getName())).collect(toList());
int resultCount = readTable(transaction, tableHandle, layoutHandle, nonFilterColumns, session, OptionalInt.empty(), Optional.empty()).getRowCount();
assertEquals(resultCount, expectedRows.size());
}
transaction.commit();
}
// insertions to the partitions with type mismatches should fail
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName);
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, TEST_HIVE_PAGE_SINK_CONTEXT);
sink.appendPage(dataAfter.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
transaction.commit();
fail("expected exception");
} catch (PrestoException e) {
// expected
assertEquals(e.getErrorCode(), HIVE_PARTITION_SCHEMA_MISMATCH.toErrorCode());
}
}
use of com.facebook.presto.hive.HiveTestUtils.FUNCTION_RESOLUTION in project presto by prestodb.
the class TestHiveFileFormats method testParquetPageSourceSchemaEvolution.
@Test(dataProvider = "rowCount")
public void testParquetPageSourceSchemaEvolution(int rowCount) throws Exception {
List<TestColumn> writeColumns = getTestColumnsSupportedByParquet();
// test index-based access
List<TestColumn> readColumns = writeColumns.stream().map(column -> new TestColumn(column.getName() + "_new", column.getObjectInspector(), column.getWriteValue(), column.getExpectedValue(), column.isPartitionKey())).collect(toList());
assertThatFileFormat(PARQUET).withWriteColumns(writeColumns).withReadColumns(readColumns).withSession(parquetPageSourceSession).withRowsCount(rowCount).isReadableByPageSource(new ParquetPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, HDFS_ENVIRONMENT, STATS, METADATA_READER));
// test name-based access
readColumns = Lists.reverse(writeColumns);
assertThatFileFormat(PARQUET).withWriteColumns(writeColumns).withReadColumns(readColumns).withSession(parquetPageSourceSessionUseName).isReadableByPageSource(new ParquetPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, HDFS_ENVIRONMENT, STATS, METADATA_READER));
}
use of com.facebook.presto.hive.HiveTestUtils.FUNCTION_RESOLUTION in project presto by prestodb.
the class TestHiveFileFormats method testOptimizedParquetWriter.
@Test(dataProvider = "rowCount")
public void testOptimizedParquetWriter(int rowCount) throws Exception {
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setOrcOptimizedWriterEnabled(true).setOrcWriterValidationPercentage(100.0), new OrcFileWriterConfig(), new ParquetFileWriterConfig().setParquetOptimizedWriterEnabled(true), new CacheConfig()).getSessionProperties());
// A Presto page can not contain a map with null keys, so a page based writer can not write null keys
List<TestColumn> testColumns = TEST_COLUMNS.stream().filter(testColumn -> !testColumn.getName().equals("t_map_null_key") && !testColumn.getName().equals("t_map_null_key_complex_value") && !testColumn.getName().equals("t_map_null_key_complex_key_value")).collect(toList());
assertThatFileFormat(PARQUET).withSession(session).withColumns(testColumns).withRowsCount(rowCount).withFileWriterFactory(new ParquetFileWriterFactory(HDFS_ENVIRONMENT, FUNCTION_AND_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE)).isReadableByPageSource(new ParquetPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, HDFS_ENVIRONMENT, STATS, METADATA_READER));
}
Aggregations