use of com.facebook.presto.spi.connector.ConnectorPartitioningMetadata in project presto by prestodb.
the class AbstractTestHiveClient method testCreateTemporaryTable.
private void testCreateTemporaryTable(List<ColumnMetadata> columns, int bucketCount, List<String> bucketingColumns, MaterializedResult inputRows, ConnectorSession session, boolean commit) throws Exception {
List<Path> insertLocations = new ArrayList<>();
HiveTableHandle tableHandle;
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
// prepare temporary table schema
List<Type> types = columns.stream().map(ColumnMetadata::getType).collect(toImmutableList());
ConnectorPartitioningMetadata partitioning = new ConnectorPartitioningMetadata(metadata.getPartitioningHandleForExchange(session, bucketCount, types), bucketingColumns);
// create temporary table
tableHandle = (HiveTableHandle) metadata.createTemporaryTable(session, columns, Optional.of(partitioning));
// begin insert into temporary table
HiveInsertTableHandle firstInsert = (HiveInsertTableHandle) metadata.beginInsert(session, tableHandle);
insertLocations.add(firstInsert.getLocationHandle().getTargetPath());
insertLocations.add(firstInsert.getLocationHandle().getWritePath());
// insert into temporary table
ConnectorPageSink firstSink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, firstInsert, TEST_HIVE_PAGE_SINK_CONTEXT);
firstSink.appendPage(inputRows.toPage());
Collection<Slice> firstFragments = getFutureValue(firstSink.finish());
if (inputRows.getRowCount() == 0) {
assertThat(firstFragments).isEmpty();
}
// begin second insert into temporary table
HiveInsertTableHandle secondInsert = (HiveInsertTableHandle) metadata.beginInsert(session, tableHandle);
insertLocations.add(secondInsert.getLocationHandle().getTargetPath());
insertLocations.add(secondInsert.getLocationHandle().getWritePath());
// insert into temporary table
ConnectorPageSink secondSink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, secondInsert, TEST_HIVE_PAGE_SINK_CONTEXT);
secondSink.appendPage(inputRows.toPage());
Collection<Slice> secondFragments = getFutureValue(secondSink.finish());
if (inputRows.getRowCount() == 0) {
assertThat(secondFragments).isEmpty();
}
// finish only second insert
metadata.finishInsert(session, secondInsert, secondFragments, ImmutableList.of());
// no splits for empty buckets if zero row file is not created
assertLessThanOrEqual(getAllSplits(transaction, tableHandle, TupleDomain.all()).size(), bucketCount);
// verify written data
Map<String, ColumnHandle> allColumnHandles = metadata.getColumnHandles(session, tableHandle);
List<ColumnHandle> dataColumnHandles = columns.stream().map(ColumnMetadata::getName).map(allColumnHandles::get).collect(toImmutableList());
// check that all columns are regular columns (not partition columns)
dataColumnHandles.stream().map(HiveColumnHandle.class::cast).forEach(handle -> {
if (handle.isPartitionKey()) {
fail("partitioning column found: " + handle.getName());
}
});
MaterializedResult outputRows = readTable(transaction, tableHandle, dataColumnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(inputRows.getMaterializedRows(), outputRows.getMaterializedRows());
if (commit) {
transaction.commit();
} else {
transaction.rollback();
}
}
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
assertThatThrownBy(() -> metadata.getColumnHandles(session, tableHandle)).isInstanceOf(TableNotFoundException.class);
}
HdfsContext context = new HdfsContext(session, tableHandle.getSchemaName(), tableHandle.getTableName(), "test_path", false);
for (Path location : insertLocations) {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, location);
assertFalse(fileSystem.exists(location));
}
}
use of com.facebook.presto.spi.connector.ConnectorPartitioningMetadata in project presto by prestodb.
the class HiveMetadata method createTemporaryTable.
@Override
public ConnectorTableHandle createTemporaryTable(ConnectorSession session, List<ColumnMetadata> columns, Optional<ConnectorPartitioningMetadata> partitioningMetadata) {
String schemaName = getTemporaryTableSchema(session);
HiveStorageFormat storageFormat = getTemporaryTableStorageFormat(session);
Optional<HiveBucketProperty> bucketProperty = partitioningMetadata.map(partitioning -> {
Set<String> allColumns = columns.stream().map(ColumnMetadata::getName).collect(toImmutableSet());
if (!allColumns.containsAll(partitioning.getPartitionColumns())) {
throw new PrestoException(INVALID_TABLE_PROPERTY, format("Bucketing columns %s not present in schema", Sets.difference(ImmutableSet.copyOf(partitioning.getPartitionColumns()), allColumns)));
}
HivePartitioningHandle partitioningHandle = (HivePartitioningHandle) partitioning.getPartitioningHandle();
List<String> partitionColumns = partitioning.getPartitionColumns();
BucketFunctionType bucketFunctionType = partitioningHandle.getBucketFunctionType();
switch(bucketFunctionType) {
case HIVE_COMPATIBLE:
return new HiveBucketProperty(partitionColumns, partitioningHandle.getBucketCount(), ImmutableList.of(), HIVE_COMPATIBLE, Optional.empty());
case PRESTO_NATIVE:
Map<String, Type> columnNameToTypeMap = columns.stream().collect(toMap(ColumnMetadata::getName, ColumnMetadata::getType));
return new HiveBucketProperty(partitionColumns, partitioningHandle.getBucketCount(), ImmutableList.of(), PRESTO_NATIVE, Optional.of(partitionColumns.stream().map(columnNameToTypeMap::get).collect(toImmutableList())));
default:
throw new IllegalArgumentException("Unsupported bucket function type " + bucketFunctionType);
}
});
if (isUsePageFileForHiveUnsupportedType(session)) {
if (!columns.stream().map(ColumnMetadata::getType).allMatch(HiveTypeTranslator::isSupportedHiveType)) {
storageFormat = PAGEFILE;
}
}
// PAGEFILE format doesn't require translation to hive type,
// choose HIVE_BINARY as a default hive type to make it compatible with Hive connector
Optional<HiveType> defaultHiveType = storageFormat == PAGEFILE ? Optional.of(HIVE_BINARY) : Optional.empty();
List<HiveColumnHandle> columnHandles = getColumnHandles(// type to the boolean type that is binary compatible
translateHiveUnsupportedTypesForTemporaryTable(columns, typeManager), ImmutableSet.of(), typeTranslator, defaultHiveType);
validateColumns(storageFormat, columnHandles);
HiveStorageFormat finalStorageFormat = storageFormat;
String tableName = PRESTO_TEMPORARY_TABLE_NAME_PREFIX + finalStorageFormat.name() + "_" + session.getQueryId().replaceAll("-", "_") + "_" + randomUUID().toString().replaceAll("-", "_");
Table table = Table.builder().setDatabaseName(schemaName).setTableName(tableName).setOwner(session.getUser()).setTableType(TEMPORARY_TABLE).setDataColumns(columnHandles.stream().map(handle -> new Column(handle.getName(), handle.getHiveType(), handle.getComment(), Optional.empty())).collect(toImmutableList())).withStorage(storage -> storage.setStorageFormat(fromHiveStorageFormat(finalStorageFormat)).setBucketProperty(bucketProperty).setLocation("")).build();
List<String> partitionColumnNames = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
List<HiveColumnHandle> hiveColumnHandles = hiveColumnHandles(table);
Map<String, Type> columnTypes = hiveColumnHandles.stream().filter(columnHandle -> !columnHandle.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
Map<String, Set<ColumnStatisticType>> columnStatisticTypes = hiveColumnHandles.stream().filter(columnHandle -> !partitionColumnNames.contains(columnHandle.getName())).filter(column -> !column.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> ImmutableSet.copyOf(getSupportedColumnStatisticsForTemporaryTable(typeManager.getType(column.getTypeSignature())))));
metastore.createTable(session, table, buildInitialPrivilegeSet(table.getOwner()), Optional.empty(), false, createEmptyPartitionStatistics(columnTypes, columnStatisticTypes));
return new HiveTableHandle(schemaName, tableName);
}
use of com.facebook.presto.spi.connector.ConnectorPartitioningMetadata in project presto by prestodb.
the class MetadataManager method createConnectorPartitioningMetadata.
private static ConnectorPartitioningMetadata createConnectorPartitioningMetadata(ConnectorId connectorId, PartitioningMetadata partitioningMetadata) {
ConnectorId partitioningConnectorId = partitioningMetadata.getPartitioningHandle().getConnectorId().orElseThrow(() -> new IllegalArgumentException("connectorId is expected to be present in the connector partitioning handle"));
checkArgument(connectorId.equals(partitioningConnectorId), "Unexpected partitioning handle connector: %s. Expected: %s.", partitioningConnectorId, connectorId);
return new ConnectorPartitioningMetadata(partitioningMetadata.getPartitioningHandle().getConnectorHandle(), partitioningMetadata.getPartitionColumns());
}
use of com.facebook.presto.spi.connector.ConnectorPartitioningMetadata in project presto by prestodb.
the class AbstractTestHiveClient method testCreateBucketedTemporaryTableWithMissingBuckets.
@Test
public void testCreateBucketedTemporaryTableWithMissingBuckets() {
List<ColumnMetadata> columns = TEMPORARY_TABLE_COLUMNS;
List<String> bucketingColumns = TEMPORARY_TABLE_BUCKET_COLUMNS;
int bucketCount = TEMPORARY_TABLE_BUCKET_COUNT;
MaterializedResult singleRow = MaterializedResult.resultBuilder(SESSION, VARCHAR, VARCHAR).row("1", "value1").build();
ConnectorSession session = newSession();
HiveTableHandle tableHandle;
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
// prepare temporary table schema
List<Type> types = columns.stream().map(ColumnMetadata::getType).collect(toImmutableList());
ConnectorPartitioningMetadata partitioning = new ConnectorPartitioningMetadata(metadata.getPartitioningHandleForExchange(session, bucketCount, types), bucketingColumns);
// create temporary table
tableHandle = (HiveTableHandle) metadata.createTemporaryTable(session, columns, Optional.of(partitioning));
// begin insert into temporary table
HiveInsertTableHandle insert = (HiveInsertTableHandle) metadata.beginInsert(session, tableHandle);
// insert into temporary table
ConnectorPageSink firstSink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insert, TEST_HIVE_PAGE_SINK_CONTEXT);
firstSink.appendPage(singleRow.toPage());
Collection<Slice> fragments = getFutureValue(firstSink.finish());
if (singleRow.getRowCount() == 0) {
assertThat(fragments).isEmpty();
}
// finish insert
metadata.finishInsert(session, insert, fragments, ImmutableList.of());
// Only one split since there is only one non empty bucket
assertEquals(getAllSplits(transaction, tableHandle, TupleDomain.all()).size(), 1);
transaction.rollback();
}
}
Aggregations