use of io.prestosql.spi.connector.Constraint in project boostkit-bigdata by kunpengcompute.
the class AbstractTestHive method testGetPartitionsWithBindings.
@Test
public void testGetPartitionsWithBindings() {
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat);
Constraint constraint = new Constraint(TupleDomain.withColumnDomains(ImmutableMap.of(intColumn, Domain.singleValue(BIGINT, 5L))));
tableHandle = applyFilter(metadata, tableHandle, constraint);
ConnectorTableProperties properties = metadata.getTableProperties(newSession(), tableHandle);
assertExpectedTableProperties(properties, tablePartitionFormatProperties);
assertExpectedPartitions(tableHandle, tablePartitionFormatPartitions);
}
}
use of io.prestosql.spi.connector.Constraint in project boostkit-bigdata by kunpengcompute.
the class AbstractTestHive method readTable.
private MaterializedResult readTable(Transaction transaction, ConnectorTableHandle connectorTableHandle, List<ColumnHandle> columnHandles, ConnectorSession session, TupleDomain<ColumnHandle> tupleDomain, OptionalInt expectedSplitCount, Optional<HiveStorageFormat> expectedStorageFormat) throws Exception {
ConnectorTableHandle tableHandle = connectorTableHandle;
tableHandle = applyFilter(transaction.getMetadata(), tableHandle, new Constraint(tupleDomain));
List<ConnectorSplit> splits = getAllSplits(splitManager.getSplits(transaction.getTransactionHandle(), session, tableHandle, UNGROUPED_SCHEDULING));
if (expectedSplitCount.isPresent()) {
assertEquals(splits.size(), expectedSplitCount.getAsInt());
}
ImmutableList.Builder<MaterializedRow> allRows = ImmutableList.builder();
for (ConnectorSplit split : splits) {
try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles)) {
expectedStorageFormat.ifPresent(format -> assertPageSourceType(pageSource, format));
MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
allRows.addAll(result.getMaterializedRows());
}
}
return new MaterializedResult(allRows.build(), getTypes(columnHandles));
}
use of io.prestosql.spi.connector.Constraint in project boostkit-bigdata by kunpengcompute.
the class HiveMetadata method getPartitionsSystemTable.
private Optional<SystemTable> getPartitionsSystemTable(ConnectorSession session, SchemaTableName tableName, SchemaTableName sourceTableName) {
HiveTableHandle sourceTableHandle = getTableHandle(session, sourceTableName);
if (sourceTableHandle == null) {
return Optional.empty();
}
SchemaTableName schemaTableName = sourceTableHandle.getSchemaTableName();
Table table = metastore.getTable(new HiveIdentity(session), schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(schemaTableName));
List<HiveColumnHandle> partitionColumns = sourceTableHandle.getPartitionColumns();
if (partitionColumns.isEmpty()) {
return Optional.empty();
}
List<Type> partitionColumnTypes = partitionColumns.stream().map(HiveColumnHandle::getTypeSignature).map(typeManager::getType).collect(toImmutableList());
List<ColumnMetadata> partitionSystemTableColumns = partitionColumns.stream().map(column -> new ColumnMetadata(column.getName(), typeManager.getType(column.getTypeSignature()), column.getComment().orElse(null), column.isHidden())).collect(toImmutableList());
Map<Integer, HiveColumnHandle> fieldIdToColumnHandle = IntStream.range(0, partitionColumns.size()).boxed().collect(toImmutableMap(identity(), partitionColumns::get));
return Optional.of(createSystemTable(new ConnectorTableMetadata(tableName, partitionSystemTableColumns), constraint -> {
TupleDomain<ColumnHandle> targetTupleDomain = constraint.transform(fieldIdToColumnHandle::get);
Predicate<Map<ColumnHandle, NullableValue>> targetPredicate = convertToPredicate(targetTupleDomain);
Constraint targetConstraint = new Constraint(targetTupleDomain, targetPredicate);
Iterable<List<Object>> records = () -> stream(partitionManager.getPartitions(metastore, new HiveIdentity(session), sourceTableHandle, targetConstraint, table).getPartitions()).map(hivePartition -> IntStream.range(0, partitionColumns.size()).mapToObj(fieldIdToColumnHandle::get).map(columnHandle -> hivePartition.getKeys().get(columnHandle).getValue()).collect(toList())).iterator();
return new InMemoryRecordSet(partitionColumnTypes, records).cursor();
}));
}
use of io.prestosql.spi.connector.Constraint in project boostkit-bigdata by kunpengcompute.
the class HiveMetadata method applyFilter.
@Override
public Optional<ConstraintApplicationResult<ConnectorTableHandle>> applyFilter(ConnectorSession session, ConnectorTableHandle tableHandle, Constraint constraint, List<Constraint> disjuctConstaints, Set<ColumnHandle> allColumnHandles, boolean pushPartitionsOnly) {
HiveIdentity identity = new HiveIdentity(session);
HiveTableHandle handle = (HiveTableHandle) tableHandle;
checkArgument(!handle.getAnalyzePartitionValues().isPresent() || constraint.getSummary().isAll(), "Analyze should not have a constraint");
SchemaTableName tableName = handle.getSchemaTableName();
Table table = metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
HivePartitionResult partitionResult = partitionManager.getPartitions(metastore, identity, handle, constraint, table);
HiveTableHandle newHandle = partitionManager.applyPartitionResult(handle, partitionResult);
// the goal here is to pushdown all the constraints/predicates to HivePageSourceProvider
// in case some pre-filtering can be done using the heuristic-index
// however, during scheduling we can't be sure a column will have a heuristic-index.
// therefore, filtering should still be done using the filter operator,
// hence the unenforced constraints below includes all constraints (minus partitions)
ImmutableMap.Builder<HiveColumnHandle, Domain> pushedDown = ImmutableMap.builder();
pushedDown.putAll(partitionResult.getUnenforcedConstraint().getDomains().get().entrySet().stream().collect(toMap(e -> (HiveColumnHandle) e.getKey(), e -> e.getValue())));
TupleDomain<HiveColumnHandle> newEffectivePredicate = newHandle.getCompactEffectivePredicate().intersect(handle.getCompactEffectivePredicate()).intersect(withColumnDomains(pushedDown.build()));
ImmutableList.Builder<TupleDomain<HiveColumnHandle>> builder = ImmutableList.builder();
disjuctConstaints.stream().forEach(c -> {
TupleDomain<HiveColumnHandle> newSubDomain = withColumnDomains(c.getSummary().getDomains().get().entrySet().stream().collect(toMap(e -> (HiveColumnHandle) e.getKey(), e -> e.getValue()))).subtract(newEffectivePredicate);
if (!newSubDomain.isNone()) {
builder.add(newSubDomain);
}
});
// Get list of all columns involved in predicate
Set<String> predicateColumnNames = new HashSet<>();
newEffectivePredicate.getDomains().get().keySet().stream().map(HiveColumnHandle::getColumnName).forEach(predicateColumnNames::add);
List<TupleDomain<HiveColumnHandle>> newEffectivePredicates = null;
boolean isSuitableToPush = false;
if (HiveSessionProperties.isOrcPredicatePushdownEnabled(session)) {
isSuitableToPush = checkIfSuitableToPush(allColumnHandles, tableHandle, session);
}
if (isSuitableToPush && HiveSessionProperties.isOrcDisjunctPredicatePushdownEnabled(session)) {
newEffectivePredicates = builder.build();
newEffectivePredicates.stream().forEach(nfp -> nfp.getDomains().get().keySet().stream().map(HiveColumnHandle::getColumnName).forEach(predicateColumnNames::add));
}
if (isSuitableToPush && partitionResult.getEnforcedConstraint().equals(newEffectivePredicate) && (newEffectivePredicates == null || newEffectivePredicates.size() == 0)) {
isSuitableToPush = false;
}
// Get column handle
Map<String, ColumnHandle> columnHandles = getColumnHandles(table);
// map predicate columns to hive column handles
Map<String, HiveColumnHandle> predicateColumns = predicateColumnNames.stream().map(columnHandles::get).map(HiveColumnHandle.class::cast).filter(HiveColumnHandle::isRegular).collect(toImmutableMap(HiveColumnHandle::getName, identity()));
newHandle = new HiveTableHandle(newHandle.getSchemaName(), newHandle.getTableName(), newHandle.getTableParameters(), newHandle.getPartitionColumns(), newHandle.getPartitions(), newEffectivePredicate, newHandle.getEnforcedConstraint(), newHandle.getBucketHandle(), newHandle.getBucketFilter(), newHandle.getAnalyzePartitionValues(), predicateColumns, Optional.ofNullable(newEffectivePredicates), isSuitableToPush, newHandle.getOffloadExpression());
if (pushPartitionsOnly && handle.getPartitions().equals(newHandle.getPartitions()) && handle.getCompactEffectivePredicate().equals(newHandle.getCompactEffectivePredicate()) && handle.getBucketFilter().equals(newHandle.getBucketFilter())) {
return Optional.empty();
}
if (!pushPartitionsOnly && isSuitableToPush) {
return Optional.of(new ConstraintApplicationResult<>(newHandle, TupleDomain.all()));
}
// note here that all unenforced constraints will still be applied using the filter operator
return Optional.of(new ConstraintApplicationResult<>(newHandle, partitionResult.getUnenforcedConstraint()));
}
use of io.prestosql.spi.connector.Constraint in project boostkit-bigdata by kunpengcompute.
the class HiveMetadata method getTableStatistics.
@Override
public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTableHandle tableHandle, Constraint constraint, boolean includeColumnStatistics) {
if (!HiveSessionProperties.isStatisticsEnabled(session)) {
return TableStatistics.empty();
}
SchemaTableName tableName = ((HiveTableHandle) tableHandle).getSchemaTableName();
Table table = metastore.getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
Map<String, ColumnHandle> columns = getColumnHandles(table).entrySet().stream().filter(entry -> !((HiveColumnHandle) entry.getValue()).isHidden()).collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue));
Map<String, Type> columnTypes = columns.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> getColumnMetadata(session, tableHandle, entry.getValue()).getType()));
HivePartitionResult partitionResult = partitionManager.getPartitions(metastore, new HiveIdentity(session), tableHandle, constraint, table);
List<HivePartition> partitions = partitionManager.getPartitionsAsList(partitionResult);
return hiveStatisticsProvider.getTableStatistics(session, ((HiveTableHandle) tableHandle).getSchemaTableName(), columns, columnTypes, partitions, includeColumnStatistics, table);
}
Aggregations