use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.
the class TestBackgroundHiveSplitLoader method testPartitionedTableWithDynamicFilter.
@Test
public void testPartitionedTableWithDynamicFilter() throws Exception {
TypeManager typeManager = new TestingTypeManager();
List<HivePartitionMetadata> hivePartitionMetadatas = ImmutableList.of(new HivePartitionMetadata(new HivePartition(new SchemaTableName("testSchema", "table_name")), Optional.of(new Partition("testSchema", "table_name", ImmutableList.of("1"), TABLE_STORAGE, ImmutableList.of(TABLE_COLUMN), ImmutableMap.of("param", "value"))), ImmutableMap.of()));
ConnectorSession connectorSession = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig().setMaxSplitSize(new DataSize(1.0, GIGABYTE)).setDynamicFilterPartitionFilteringEnabled(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
BackgroundHiveSplitLoader backgroundHiveSplitLoader = new BackgroundHiveSplitLoader(PARTITIONED_TABLE, hivePartitionMetadatas, TupleDomain.all(), BackgroundHiveSplitLoader.BucketSplitInfo.createBucketSplitInfo(Optional.empty(), Optional.empty()), connectorSession, new TestingHdfsEnvironment(TEST_FILES), new NamenodeStats(), new CachingDirectoryLister(new HiveConfig()), directExecutor(), 2, false, Optional.empty(), createTestDynamicFilterSupplier("partitionColumn", ImmutableList.of(0L, 2L, 3L)), Optional.empty(), ImmutableMap.of(), typeManager);
HiveSplitSource hiveSplitSource = hiveSplitSource(backgroundHiveSplitLoader);
backgroundHiveSplitLoader.start(hiveSplitSource);
List<HiveSplit> splits = drainSplits(hiveSplitSource);
assertEquals(splits.size(), 0, "Splits should be filtered");
}
use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.
the class AbstractTestHive method listAllDataPaths.
public static List<String> listAllDataPaths(HiveIdentity identity, SemiTransactionalHiveMetastore metastore, String schemaName, String tableName) {
ImmutableList.Builder<String> locations = ImmutableList.builder();
Table table = metastore.getTable(identity, schemaName, tableName).get();
if (table.getStorage().getLocation() != null) {
// For partitioned table, there should be nothing directly under this directory.
// But including this location in the set makes the directory content assert more
// extensive, which is desirable.
locations.add(table.getStorage().getLocation());
}
Optional<List<String>> partitionNames = metastore.getPartitionNames(identity, schemaName, tableName);
if (partitionNames.isPresent()) {
metastore.getPartitionsByNames(identity, schemaName, tableName, partitionNames.get()).values().stream().map(Optional::get).map(partition -> partition.getStorage().getLocation()).filter(location -> !location.startsWith(table.getStorage().getLocation())).forEach(locations::add);
}
return locations.build();
}
use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.
the class AbstractTestHive method testStorePartitionWithStatistics.
protected void testStorePartitionWithStatistics(List<ColumnMetadata> columns, PartitionStatistics statsForAllColumns1, PartitionStatistics statsForAllColumns2, PartitionStatistics statsForSubsetOfColumns, PartitionStatistics emptyStatistics) throws Exception {
SchemaTableName tableName = temporaryTable("store_partition_with_statistics");
try {
doCreateEmptyTable(tableName, ORC, columns);
HiveMetastoreClosure hiveMetastoreClient = new HiveMetastoreClosure(getMetastoreClient());
HiveIdentity identity = new HiveIdentity(SESSION);
Table table = hiveMetastoreClient.getTable(identity, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
List<String> partitionValues = ImmutableList.of("2016-01-01");
String partitionName = makePartName(ImmutableList.of("ds"), partitionValues);
Partition partition = createDummyPartition(table, partitionName);
// create partition with stats for all columns
hiveMetastoreClient.addPartitions(identity, tableName.getSchemaName(), tableName.getTableName(), ImmutableList.of(new PartitionWithStatistics(partition, partitionName, statsForAllColumns1)));
assertEquals(hiveMetastoreClient.getPartition(identity, tableName.getSchemaName(), tableName.getTableName(), partitionValues).get().getStorage().getStorageFormat(), StorageFormat.fromHiveStorageFormat(ORC));
assertThat(hiveMetastoreClient.getPartitionStatistics(identity, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForAllColumns1));
// alter the partition into one with other stats
Partition modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(StorageFormat.fromHiveStorageFormat(RCBINARY)).setLocation(partitionTargetPath(tableName, partitionName))).build();
hiveMetastoreClient.alterPartition(identity, tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, statsForAllColumns2));
assertEquals(hiveMetastoreClient.getPartition(identity, tableName.getSchemaName(), tableName.getTableName(), partitionValues).get().getStorage().getStorageFormat(), StorageFormat.fromHiveStorageFormat(RCBINARY));
assertThat(hiveMetastoreClient.getPartitionStatistics(identity, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForAllColumns2));
// alter the partition into one with stats for only subset of columns
modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(StorageFormat.fromHiveStorageFormat(TEXTFILE)).setLocation(partitionTargetPath(tableName, partitionName))).build();
hiveMetastoreClient.alterPartition(identity, tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, statsForSubsetOfColumns));
assertThat(hiveMetastoreClient.getPartitionStatistics(identity, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForSubsetOfColumns));
// alter the partition into one without stats
modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(StorageFormat.fromHiveStorageFormat(TEXTFILE)).setLocation(partitionTargetPath(tableName, partitionName))).build();
hiveMetastoreClient.alterPartition(identity, tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, emptyStatistics));
assertThat(hiveMetastoreClient.getPartitionStatistics(identity, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, emptyStatistics));
} finally {
dropTable(tableName);
}
}
use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.
the class GlueHiveMetastore method updatePartitionStatistics.
@Override
public void updatePartitionStatistics(HiveIdentity identity, String databaseName, String tableName, String partitionName, Function<PartitionStatistics, PartitionStatistics> update) {
List<String> partitionValues = toPartitionValues(partitionName);
Partition partition = getPartition(identity, databaseName, tableName, partitionValues).orElseThrow(() -> new PrestoException(HIVE_PARTITION_DROPPED_DURING_QUERY, "Statistics result does not contain entry for partition: " + partitionName));
PartitionStatistics currentStatistics = getPartitionStatistics(partition);
PartitionStatistics updatedStatistics = update.apply(currentStatistics);
if (!updatedStatistics.getColumnStatistics().isEmpty()) {
throw new PrestoException(NOT_SUPPORTED, "Glue metastore does not support column level statistics");
}
try {
PartitionInput partitionInput = GlueInputConverter.convertPartition(partition);
partitionInput.setParameters(ThriftMetastoreUtil.updateStatisticsParameters(partition.getParameters(), updatedStatistics.getBasicStatistics()));
glueClient.updatePartition(new UpdatePartitionRequest().withCatalogId(catalogId).withDatabaseName(databaseName).withTableName(tableName).withPartitionValueList(partition.getValues()).withPartitionInput(partitionInput));
} catch (EntityNotFoundException e) {
throw new PartitionNotFoundException(new SchemaTableName(databaseName, tableName), partitionValues);
} catch (AmazonServiceException e) {
throw new PrestoException(HiveErrorCode.HIVE_METASTORE_ERROR, e);
}
}
use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.
the class GlueHiveMetastore method getPartitionsByNames.
/**
* <pre>
* Ex: Partition keys = ['a', 'b']
* Partition names = ['a=1/b=2', 'a=2/b=2']
* </pre>
*
* @param partitionNames List of full partition names
* @return Mapping of partition name to partition object
*/
@Override
public Map<String, Optional<Partition>> getPartitionsByNames(HiveIdentity identity, String databaseName, String tableName, List<String> partitionNames) {
requireNonNull(partitionNames, "partitionNames is null");
if (partitionNames.isEmpty()) {
return ImmutableMap.of();
}
List<Partition> partitions = batchGetPartition(databaseName, tableName, partitionNames);
Map<String, List<String>> partitionNameToPartitionValuesMap = partitionNames.stream().collect(toMap(identity(), HiveUtil::toPartitionValues));
Map<List<String>, Partition> partitionValuesToPartitionMap = partitions.stream().collect(toMap(Partition::getValues, identity()));
ImmutableMap.Builder<String, Optional<Partition>> resultBuilder = ImmutableMap.builder();
for (Entry<String, List<String>> entry : partitionNameToPartitionValuesMap.entrySet()) {
Partition partition = partitionValuesToPartitionMap.get(entry.getValue());
resultBuilder.put(entry.getKey(), Optional.ofNullable(partition));
}
return resultBuilder.build();
}
Aggregations