use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.
the class GlueToPrestoConverter method convertPartition.
public static Partition convertPartition(com.amazonaws.services.glue.model.Partition gluePartition) {
requireNonNull(gluePartition.getStorageDescriptor(), "Partition StorageDescriptor is null");
StorageDescriptor sd = gluePartition.getStorageDescriptor();
Partition.Builder partitionBuilder = Partition.builder().setDatabaseName(gluePartition.getDatabaseName()).setTableName(gluePartition.getTableName()).setValues(gluePartition.getValues()).setColumns(sd.getColumns().stream().map(GlueToPrestoConverter::convertColumn).collect(toList())).setParameters(firstNonNull(gluePartition.getParameters(), ImmutableMap.of()));
setStorageBuilder(sd, partitionBuilder.getStorageBuilder());
return partitionBuilder.build();
}
use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.
the class BridgingHiveMetastore method getPartitionsByNames.
@Override
public Map<String, Optional<Partition>> getPartitionsByNames(HiveIdentity identity, String databaseName, String tableName, List<String> partitionNames) {
requireNonNull(partitionNames, "partitionNames is null");
if (partitionNames.isEmpty()) {
return ImmutableMap.of();
}
Function<org.apache.hadoop.hive.metastore.api.Partition, Partition> fromMetastoreApiPartition = ThriftMetastoreUtil::fromMetastoreApiPartition;
boolean isAvroTableWithSchemaSet = delegate.getTable(identity, databaseName, tableName).map(ThriftMetastoreUtil::isAvroTableWithSchemaSet).orElse(false);
if (isAvroTableWithSchemaSet) {
List<FieldSchema> schema = delegate.getFields(identity, databaseName, tableName).get();
fromMetastoreApiPartition = partition -> ThriftMetastoreUtil.fromMetastoreApiPartition(partition, schema);
}
Map<String, List<String>> partitionNameToPartitionValuesMap = partitionNames.stream().collect(Collectors.toMap(identity(), HiveUtil::toPartitionValues));
Map<List<String>, Partition> partitionValuesToPartitionMap = delegate.getPartitionsByNames(identity, databaseName, tableName, partitionNames).stream().map(fromMetastoreApiPartition).collect(Collectors.toMap(Partition::getValues, identity()));
ImmutableMap.Builder<String, Optional<Partition>> resultBuilder = ImmutableMap.builder();
for (Map.Entry<String, List<String>> entry : partitionNameToPartitionValuesMap.entrySet()) {
Partition partition = partitionValuesToPartitionMap.get(entry.getValue());
resultBuilder.put(entry.getKey(), Optional.ofNullable(partition));
}
return resultBuilder.build();
}
use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.
the class ThriftMetastoreUtil method fromMetastoreApiPartition.
public static Partition fromMetastoreApiPartition(org.apache.hadoop.hive.metastore.api.Partition partition, List<FieldSchema> schema) {
StorageDescriptor storageDescriptor = partition.getSd();
if (storageDescriptor == null) {
throw new PrestoException(HiveErrorCode.HIVE_INVALID_METADATA, "Partition does not contain a storage descriptor: " + partition);
}
Partition.Builder partitionBuilder = Partition.builder().setDatabaseName(partition.getDbName()).setTableName(partition.getTableName()).setValues(partition.getValues()).setColumns(schema.stream().map(ThriftMetastoreUtil::fromMetastoreApiFieldSchema).collect(toList())).setParameters(partition.getParameters());
// TODO is bucketing_version set on partition level??
fromMetastoreApiStorageDescriptor(partition.getParameters(), storageDescriptor, partitionBuilder.getStorageBuilder(), format("%s.%s", partition.getTableName(), partition.getValues()));
return partitionBuilder.build();
}
use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.
the class AbstractTestHive method doInsertIntoNewPartition.
private void doInsertIntoNewPartition(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
// creating the table
doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED);
// insert the data
String queryId = insertData(tableName, CREATE_TABLE_PARTITIONED_DATA);
Set<String> existingFiles;
try (Transaction transaction = newTransaction()) {
// verify partitions were created
HiveIdentity identity = new HiveIdentity(newSession());
List<String> partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(identity, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream().map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)).collect(toList()));
// verify the node versions in partitions
Map<String, Optional<Partition>> partitions = getMetastoreClient().getPartitionsByNames(identity, tableName.getSchemaName(), tableName.getTableName(), partitionNames);
assertEquals(partitions.size(), partitionNames.size());
for (String partitionName : partitionNames) {
Partition partition = partitions.get(partitionName).get();
assertEquals(partition.getParameters().get(PRESTO_VERSION_NAME), TEST_SERVER_VERSION);
assertEquals(partition.getParameters().get(PRESTO_QUERY_ID_NAME), queryId);
}
// load the new table
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the data
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
// test rollback
existingFiles = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
assertFalse(existingFiles.isEmpty());
// test statistics
for (String partitionName : partitionNames) {
HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(session, transaction, tableName, partitionName);
assertEquals(partitionStatistics.getRowCount().getAsLong(), 1L);
assertEquals(partitionStatistics.getFileCount().getAsLong(), 1L);
assertGreaterThan(partitionStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
assertGreaterThan(partitionStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
}
}
Path stagingPathRoot;
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
metadata.beginQuery(session);
// "stage" insert data
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
stagingPathRoot = getStagingPathRoot(insertTableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
sink.appendPage(CREATE_TABLE_PARTITIONED_DATA_2ND.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
// verify all temp files start with the unique prefix
HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
Set<String> tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle));
assertTrue(!tempFiles.isEmpty());
for (String filePath : tempFiles) {
assertThat(new Path(filePath).getName()).startsWith(session.getQueryId());
}
// rollback insert
transaction.rollback();
}
// verify the data is unchanged
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
// verify we did not modify the table directory
assertEquals(listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
// verify temp directory is empty
HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName());
assertTrue(listAllDataFiles(context, stagingPathRoot).isEmpty());
}
}
use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.
the class AbstractTestHive method eraseStatistics.
private void eraseStatistics(SchemaTableName schemaTableName) {
HiveMetastore hiveMetastoreClient = getMetastoreClient();
HiveIdentity identity = new HiveIdentity(SESSION);
hiveMetastoreClient.updateTableStatistics(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName(), statistics -> new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of()));
Table table = hiveMetastoreClient.getTable(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(schemaTableName));
List<String> partitionColumns = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
if (!table.getPartitionColumns().isEmpty()) {
List<String> partitionNames = hiveMetastoreClient.getPartitionNames(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElse(ImmutableList.of());
List<Partition> partitions = hiveMetastoreClient.getPartitionsByNames(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName(), partitionNames).entrySet().stream().map(Map.Entry::getValue).filter(Optional::isPresent).map(Optional::get).collect(toImmutableList());
for (Partition partition : partitions) {
hiveMetastoreClient.updatePartitionStatistics(identity, schemaTableName.getSchemaName(), schemaTableName.getTableName(), makePartName(partitionColumns, partition.getValues()), statistics -> new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of()));
}
}
}
Aggregations