use of io.prestosql.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.GROUPED_SCHEDULING in project hetu-core by openlookeng.
the class HiveSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, Supplier<List<Set<DynamicFilter>>> dynamicFilterSupplier, Optional<QueryType> queryType, Map<String, Object> queryInfo, Set<TupleDomain<ColumnMetadata>> userDefinedCachePredicates, boolean partOfReuse) {
HiveTableHandle hiveTable = (HiveTableHandle) tableHandle;
SchemaTableName tableName = hiveTable.getSchemaTableName();
// get table metadata
SemiTransactionalHiveMetastore metastore = metastoreProvider.apply((HiveTransactionHandle) transaction);
Table table = metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
if (table.getStorage().getStorageFormat().getInputFormat().contains("carbon")) {
throw new PrestoException(NOT_SUPPORTED, "Hive connector can't read carbondata tables");
}
// verify table is not marked as non-readable
String tableNotReadable = table.getParameters().get(OBJECT_NOT_READABLE);
if (!isNullOrEmpty(tableNotReadable)) {
throw new HiveNotReadableException(tableName, Optional.empty(), tableNotReadable);
}
// get partitions
List<HivePartition> partitions = partitionManager.getOrLoadPartitions(session, metastore, new HiveIdentity(session), hiveTable);
// short circuit if we don't have any partitions
if (partitions.isEmpty()) {
return new FixedSplitSource(ImmutableList.of());
}
// get buckets from first partition (arbitrary)
Optional<HiveBucketing.HiveBucketFilter> bucketFilter = hiveTable.getBucketFilter();
// validate bucket bucketed execution
Optional<HiveBucketHandle> bucketHandle = hiveTable.getBucketHandle();
if ((splitSchedulingStrategy == GROUPED_SCHEDULING) && !bucketHandle.isPresent()) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "SchedulingPolicy is bucketed, but BucketHandle is not present");
}
// sort partitions
partitions = Ordering.natural().onResultOf(HivePartition::getPartitionId).reverse().sortedCopy(partitions);
Iterable<HivePartitionMetadata> hivePartitions = getPartitionMetadata(session, metastore, table, tableName, partitions, bucketHandle.map(HiveBucketHandle::toTableBucketProperty));
HiveSplitLoader hiveSplitLoader = new BackgroundHiveSplitLoader(table, hivePartitions, hiveTable.getCompactEffectivePredicate(), BackgroundHiveSplitLoader.BucketSplitInfo.createBucketSplitInfo(bucketHandle, bucketFilter), session, hdfsEnvironment, namenodeStats, directoryLister, executor, splitLoaderConcurrency, recursiveDfsWalkerEnabled, metastore.getValidWriteIds(session, hiveTable, queryType.map(t -> t == QueryType.VACUUM).orElse(false)).map(validTxnWriteIdList -> validTxnWriteIdList.getTableValidWriteIdList(table.getDatabaseName() + "." + table.getTableName())), dynamicFilterSupplier, queryType, queryInfo, typeManager);
HiveSplitSource splitSource;
HiveStorageFormat hiveStorageFormat = HiveMetadata.extractHiveStorageFormat(table);
switch(splitSchedulingStrategy) {
case UNGROUPED_SCHEDULING:
splitSource = HiveSplitSource.allAtOnce(session, table.getDatabaseName(), table.getTableName(), // For reuse, we should make sure to have same split size all time for a table.
partOfReuse ? 0 : maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, new CounterStat(), dynamicFilterSupplier, userDefinedCachePredicates, typeManager, hiveConfig, hiveStorageFormat);
break;
case GROUPED_SCHEDULING:
splitSource = HiveSplitSource.bucketed(session, table.getDatabaseName(), table.getTableName(), // For reuse, we should make sure to have same split size all time for a table.
partOfReuse ? 0 : maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, new CounterStat(), dynamicFilterSupplier, userDefinedCachePredicates, typeManager, hiveConfig, hiveStorageFormat);
break;
default:
throw new IllegalArgumentException("Unknown splitSchedulingStrategy: " + splitSchedulingStrategy);
}
hiveSplitLoader.start(splitSource);
if (queryType.isPresent() && queryType.get() == QueryType.VACUUM) {
HdfsContext hdfsContext = new HdfsContext(session, table.getDatabaseName(), table.getTableName());
return new HiveVacuumSplitSource(splitSource, (HiveVacuumTableHandle) queryInfo.get("vacuumHandle"), hdfsEnvironment, hdfsContext, session);
}
return splitSource;
}
use of io.prestosql.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.GROUPED_SCHEDULING in project boostkit-bigdata by kunpengcompute.
the class HiveSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, Supplier<List<Set<DynamicFilter>>> dynamicFilterSupplier, Optional<QueryType> queryType, Map<String, Object> queryInfo, Set<TupleDomain<ColumnMetadata>> userDefinedCachePredicates, boolean partOfReuse) {
HiveTableHandle hiveTable = (HiveTableHandle) tableHandle;
SchemaTableName tableName = hiveTable.getSchemaTableName();
// get table metadata
SemiTransactionalHiveMetastore metastore = metastoreProvider.apply((HiveTransactionHandle) transaction);
Table table = metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
if (table.getStorage().getStorageFormat().getInputFormat().contains("carbon")) {
throw new PrestoException(NOT_SUPPORTED, "Hive connector can't read carbondata tables");
}
// verify table is not marked as non-readable
String tableNotReadable = table.getParameters().get(OBJECT_NOT_READABLE);
if (!isNullOrEmpty(tableNotReadable)) {
throw new HiveNotReadableException(tableName, Optional.empty(), tableNotReadable);
}
// get partitions
List<HivePartition> partitions = partitionManager.getOrLoadPartitions(session, metastore, new HiveIdentity(session), hiveTable);
// short circuit if we don't have any partitions
if (partitions.isEmpty()) {
return new FixedSplitSource(ImmutableList.of());
}
// get buckets from first partition (arbitrary)
Optional<HiveBucketing.HiveBucketFilter> bucketFilter = hiveTable.getBucketFilter();
// validate bucket bucketed execution
Optional<HiveBucketHandle> bucketHandle = hiveTable.getBucketHandle();
if ((splitSchedulingStrategy == GROUPED_SCHEDULING) && !bucketHandle.isPresent()) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "SchedulingPolicy is bucketed, but BucketHandle is not present");
}
// sort partitions
partitions = Ordering.natural().onResultOf(HivePartition::getPartitionId).reverse().sortedCopy(partitions);
Iterable<HivePartitionMetadata> hivePartitions = getPartitionMetadata(session, metastore, table, tableName, partitions, bucketHandle.map(HiveBucketHandle::toTableBucketProperty));
HiveSplitLoader hiveSplitLoader = new BackgroundHiveSplitLoader(table, hivePartitions, hiveTable.getCompactEffectivePredicate(), BackgroundHiveSplitLoader.BucketSplitInfo.createBucketSplitInfo(bucketHandle, bucketFilter), session, hdfsEnvironment, namenodeStats, directoryLister, executor, splitLoaderConcurrency, recursiveDfsWalkerEnabled, metastore.getValidWriteIds(session, hiveTable, queryType.map(t -> t == QueryType.VACUUM).orElse(false)).map(validTxnWriteIdList -> validTxnWriteIdList.getTableValidWriteIdList(table.getDatabaseName() + "." + table.getTableName())), dynamicFilterSupplier, queryType, queryInfo, typeManager);
HiveSplitSource splitSource;
HiveStorageFormat hiveStorageFormat = HiveMetadata.extractHiveStorageFormat(table);
switch(splitSchedulingStrategy) {
case UNGROUPED_SCHEDULING:
splitSource = HiveSplitSource.allAtOnce(session, table.getDatabaseName(), table.getTableName(), // For reuse, we should make sure to have same split size all time for a table.
partOfReuse ? 0 : maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, new CounterStat(), dynamicFilterSupplier, userDefinedCachePredicates, typeManager, hiveConfig, hiveStorageFormat);
break;
case GROUPED_SCHEDULING:
splitSource = HiveSplitSource.bucketed(session, table.getDatabaseName(), table.getTableName(), // For reuse, we should make sure to have same split size all time for a table.
partOfReuse ? 0 : maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, new CounterStat(), dynamicFilterSupplier, userDefinedCachePredicates, typeManager, hiveConfig, hiveStorageFormat);
break;
default:
throw new IllegalArgumentException("Unknown splitSchedulingStrategy: " + splitSchedulingStrategy);
}
hiveSplitLoader.start(splitSource);
if (queryType.isPresent() && queryType.get() == QueryType.VACUUM) {
HdfsContext hdfsContext = new HdfsContext(session, table.getDatabaseName(), table.getTableName());
return new HiveVacuumSplitSource(splitSource, (HiveVacuumTableHandle) queryInfo.get("vacuumHandle"), hdfsEnvironment, hdfsContext, session);
}
return splitSource;
}
Aggregations