use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.
the class HiveSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, Supplier<List<Set<DynamicFilter>>> dynamicFilterSupplier, Optional<QueryType> queryType, Map<String, Object> queryInfo, Set<TupleDomain<ColumnMetadata>> userDefinedCachePredicates, boolean partOfReuse) {
HiveTableHandle hiveTable = (HiveTableHandle) tableHandle;
SchemaTableName tableName = hiveTable.getSchemaTableName();
// get table metadata
SemiTransactionalHiveMetastore metastore = metastoreProvider.apply((HiveTransactionHandle) transaction);
Table table = metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
if (table.getStorage().getStorageFormat().getInputFormat().contains("carbon")) {
throw new PrestoException(NOT_SUPPORTED, "Hive connector can't read carbondata tables");
}
// verify table is not marked as non-readable
String tableNotReadable = table.getParameters().get(OBJECT_NOT_READABLE);
if (!isNullOrEmpty(tableNotReadable)) {
throw new HiveNotReadableException(tableName, Optional.empty(), tableNotReadable);
}
// get partitions
List<HivePartition> partitions = partitionManager.getOrLoadPartitions(session, metastore, new HiveIdentity(session), hiveTable);
// short circuit if we don't have any partitions
if (partitions.isEmpty()) {
return new FixedSplitSource(ImmutableList.of());
}
// get buckets from first partition (arbitrary)
Optional<HiveBucketing.HiveBucketFilter> bucketFilter = hiveTable.getBucketFilter();
// validate bucket bucketed execution
Optional<HiveBucketHandle> bucketHandle = hiveTable.getBucketHandle();
if ((splitSchedulingStrategy == GROUPED_SCHEDULING) && !bucketHandle.isPresent()) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "SchedulingPolicy is bucketed, but BucketHandle is not present");
}
// sort partitions
partitions = Ordering.natural().onResultOf(HivePartition::getPartitionId).reverse().sortedCopy(partitions);
Iterable<HivePartitionMetadata> hivePartitions = getPartitionMetadata(session, metastore, table, tableName, partitions, bucketHandle.map(HiveBucketHandle::toTableBucketProperty));
HiveSplitLoader hiveSplitLoader = new BackgroundHiveSplitLoader(table, hivePartitions, hiveTable.getCompactEffectivePredicate(), BackgroundHiveSplitLoader.BucketSplitInfo.createBucketSplitInfo(bucketHandle, bucketFilter), session, hdfsEnvironment, namenodeStats, directoryLister, executor, splitLoaderConcurrency, recursiveDfsWalkerEnabled, metastore.getValidWriteIds(session, hiveTable, queryType.map(t -> t == QueryType.VACUUM).orElse(false)).map(validTxnWriteIdList -> validTxnWriteIdList.getTableValidWriteIdList(table.getDatabaseName() + "." + table.getTableName())), dynamicFilterSupplier, queryType, queryInfo, typeManager);
HiveSplitSource splitSource;
HiveStorageFormat hiveStorageFormat = HiveMetadata.extractHiveStorageFormat(table);
switch(splitSchedulingStrategy) {
case UNGROUPED_SCHEDULING:
splitSource = HiveSplitSource.allAtOnce(session, table.getDatabaseName(), table.getTableName(), // For reuse, we should make sure to have same split size all time for a table.
partOfReuse ? 0 : maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, new CounterStat(), dynamicFilterSupplier, userDefinedCachePredicates, typeManager, hiveConfig, hiveStorageFormat);
break;
case GROUPED_SCHEDULING:
splitSource = HiveSplitSource.bucketed(session, table.getDatabaseName(), table.getTableName(), // For reuse, we should make sure to have same split size all time for a table.
partOfReuse ? 0 : maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, new CounterStat(), dynamicFilterSupplier, userDefinedCachePredicates, typeManager, hiveConfig, hiveStorageFormat);
break;
default:
throw new IllegalArgumentException("Unknown splitSchedulingStrategy: " + splitSchedulingStrategy);
}
hiveSplitLoader.start(splitSource);
if (queryType.isPresent() && queryType.get() == QueryType.VACUUM) {
HdfsContext hdfsContext = new HdfsContext(session, table.getDatabaseName(), table.getTableName());
return new HiveVacuumSplitSource(splitSource, (HiveVacuumTableHandle) queryInfo.get("vacuumHandle"), hdfsEnvironment, hdfsContext, session);
}
return splitSource;
}
use of io.prestosql.plugin.hive.metastore.Partition in project boostkit-bigdata by kunpengcompute.
the class GlueHiveMetastore method batchGetPartition.
private List<Partition> batchGetPartition(String databaseName, String tableName, List<String> partitionNames) {
try {
List<PartitionValueList> partitionValueLists = partitionNames.stream().map(partitionName -> new PartitionValueList().withValues(toPartitionValues(partitionName))).collect(toList());
List<List<PartitionValueList>> batchedPartitionValueLists = Lists.partition(partitionValueLists, BATCH_GET_PARTITION_MAX_PAGE_SIZE);
List<Future<BatchGetPartitionResult>> batchGetPartitionFutures = new ArrayList<>();
List<Partition> result = new ArrayList<>();
for (List<PartitionValueList> partitions : batchedPartitionValueLists) {
batchGetPartitionFutures.add(glueClient.batchGetPartitionAsync(new BatchGetPartitionRequest().withCatalogId(catalogId).withDatabaseName(databaseName).withTableName(tableName).withPartitionsToGet(partitions)));
}
for (Future<BatchGetPartitionResult> future : batchGetPartitionFutures) {
future.get().getPartitions().forEach(partition -> result.add(GlueToPrestoConverter.convertPartition(partition)));
}
return result;
} catch (AmazonServiceException | InterruptedException | ExecutionException e) {
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
throw new PrestoException(HiveErrorCode.HIVE_METASTORE_ERROR, e);
}
}
use of io.prestosql.plugin.hive.metastore.Partition in project boostkit-bigdata by kunpengcompute.
the class GlueHiveMetastore method getPartitionsByNames.
/**
* <pre>
* Ex: Partition keys = ['a', 'b']
* Partition names = ['a=1/b=2', 'a=2/b=2']
* </pre>
*
* @param partitionNames List of full partition names
* @return Mapping of partition name to partition object
*/
@Override
public Map<String, Optional<Partition>> getPartitionsByNames(HiveIdentity identity, String databaseName, String tableName, List<String> partitionNames) {
requireNonNull(partitionNames, "partitionNames is null");
if (partitionNames.isEmpty()) {
return ImmutableMap.of();
}
List<Partition> partitions = batchGetPartition(databaseName, tableName, partitionNames);
Map<String, List<String>> partitionNameToPartitionValuesMap = partitionNames.stream().collect(toMap(identity(), HiveUtil::toPartitionValues));
Map<List<String>, Partition> partitionValuesToPartitionMap = partitions.stream().collect(toMap(Partition::getValues, identity()));
ImmutableMap.Builder<String, Optional<Partition>> resultBuilder = ImmutableMap.builder();
for (Entry<String, List<String>> entry : partitionNameToPartitionValuesMap.entrySet()) {
Partition partition = partitionValuesToPartitionMap.get(entry.getValue());
resultBuilder.put(entry.getKey(), Optional.ofNullable(partition));
}
return resultBuilder.build();
}
use of io.prestosql.plugin.hive.metastore.Partition in project boostkit-bigdata by kunpengcompute.
the class GlueHiveMetastore method getPartitions.
private List<Partition> getPartitions(String databaseName, String tableName, String expression) {
try {
List<Partition> partitions = new ArrayList<>();
String nextToken = null;
do {
GetPartitionsResult result = glueClient.getPartitions(new GetPartitionsRequest().withCatalogId(catalogId).withDatabaseName(databaseName).withTableName(tableName).withExpression(expression).withNextToken(nextToken));
result.getPartitions().forEach(partition -> partitions.add(GlueToPrestoConverter.convertPartition(partition)));
nextToken = result.getNextToken();
} while (nextToken != null);
return partitions;
} catch (AmazonServiceException e) {
throw new PrestoException(HiveErrorCode.HIVE_METASTORE_ERROR, e);
}
}
use of io.prestosql.plugin.hive.metastore.Partition in project boostkit-bigdata by kunpengcompute.
the class GlueHiveMetastore method dropPartition.
@Override
public void dropPartition(HiveIdentity identity, String databaseName, String tableName, List<String> parts, boolean deleteData) {
Table table = getTableOrElseThrow(identity, databaseName, tableName);
Partition partition = getPartition(identity, databaseName, tableName, parts).orElseThrow(() -> new PartitionNotFoundException(new SchemaTableName(databaseName, tableName), parts));
try {
glueClient.deletePartition(new DeletePartitionRequest().withCatalogId(catalogId).withDatabaseName(databaseName).withTableName(tableName).withPartitionValues(parts));
} catch (AmazonServiceException e) {
throw new PrestoException(HiveErrorCode.HIVE_METASTORE_ERROR, e);
}
String partLocation = partition.getStorage().getLocation();
if (deleteData && isManagedTable(table) && !isNullOrEmpty(partLocation)) {
deleteDir(hdfsContext, hdfsEnvironment, new Path(partLocation), true);
}
}
Aggregations