use of io.trino.spi.predicate.Domain in project trino by trinodb.
the class IcebergPageSourceProvider method createOrcPageSource.
private static ReaderPageSource createOrcPageSource(HdfsEnvironment hdfsEnvironment, ConnectorIdentity identity, Configuration configuration, Path path, long start, long length, long fileSize, List<IcebergColumnHandle> columns, TupleDomain<IcebergColumnHandle> effectivePredicate, OrcReaderOptions options, FileFormatDataSourceStats stats, TypeManager typeManager, Optional<NameMapping> nameMapping) {
OrcDataSource orcDataSource = null;
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(identity, path, configuration);
FSDataInputStream inputStream = hdfsEnvironment.doAs(identity, () -> fileSystem.open(path));
orcDataSource = new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileSize, options, inputStream, stats);
OrcReader reader = OrcReader.createOrcReader(orcDataSource, options).orElseThrow(() -> new TrinoException(ICEBERG_BAD_DATA, "ORC file is zero length"));
List<OrcColumn> fileColumns = reader.getRootColumn().getNestedColumns();
if (nameMapping.isPresent() && !hasIds(reader.getRootColumn())) {
fileColumns = fileColumns.stream().map(orcColumn -> setMissingFieldIds(orcColumn, nameMapping.get(), ImmutableList.of(orcColumn.getColumnName()))).collect(toImmutableList());
}
Map<Integer, OrcColumn> fileColumnsByIcebergId = mapIdsToOrcFileColumns(fileColumns);
TupleDomainOrcPredicateBuilder predicateBuilder = TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(options.isBloomFiltersEnabled());
Map<IcebergColumnHandle, Domain> effectivePredicateDomains = effectivePredicate.getDomains().orElseThrow(() -> new IllegalArgumentException("Effective predicate is none"));
Optional<ReaderColumns> columnProjections = projectColumns(columns);
Map<Integer, List<List<Integer>>> projectionsByFieldId = columns.stream().collect(groupingBy(column -> column.getBaseColumnIdentity().getId(), mapping(IcebergColumnHandle::getPath, toUnmodifiableList())));
List<IcebergColumnHandle> readColumns = columnProjections.map(readerColumns -> (List<IcebergColumnHandle>) readerColumns.get().stream().map(IcebergColumnHandle.class::cast).collect(toImmutableList())).orElse(columns);
List<OrcColumn> fileReadColumns = new ArrayList<>(readColumns.size());
List<Type> fileReadTypes = new ArrayList<>(readColumns.size());
List<ProjectedLayout> projectedLayouts = new ArrayList<>(readColumns.size());
List<ColumnAdaptation> columnAdaptations = new ArrayList<>(readColumns.size());
for (IcebergColumnHandle column : readColumns) {
verify(column.isBaseColumn(), "Column projections must be based from a root column");
OrcColumn orcColumn = fileColumnsByIcebergId.get(column.getId());
if (orcColumn != null) {
Type readType = getOrcReadType(column.getType(), typeManager);
if (column.getType() == UUID && !"UUID".equals(orcColumn.getAttributes().get(ICEBERG_BINARY_TYPE))) {
throw new TrinoException(ICEBERG_BAD_DATA, format("Expected ORC column for UUID data to be annotated with %s=UUID: %s", ICEBERG_BINARY_TYPE, orcColumn));
}
List<List<Integer>> fieldIdProjections = projectionsByFieldId.get(column.getId());
ProjectedLayout projectedLayout = IcebergOrcProjectedLayout.createProjectedLayout(orcColumn, fieldIdProjections);
int sourceIndex = fileReadColumns.size();
columnAdaptations.add(ColumnAdaptation.sourceColumn(sourceIndex));
fileReadColumns.add(orcColumn);
fileReadTypes.add(readType);
projectedLayouts.add(projectedLayout);
for (Map.Entry<IcebergColumnHandle, Domain> domainEntry : effectivePredicateDomains.entrySet()) {
IcebergColumnHandle predicateColumn = domainEntry.getKey();
OrcColumn predicateOrcColumn = fileColumnsByIcebergId.get(predicateColumn.getId());
if (predicateOrcColumn != null && column.getColumnIdentity().equals(predicateColumn.getBaseColumnIdentity())) {
predicateBuilder.addColumn(predicateOrcColumn.getColumnId(), domainEntry.getValue());
}
}
} else {
columnAdaptations.add(ColumnAdaptation.nullColumn(column.getType()));
}
}
AggregatedMemoryContext memoryUsage = newSimpleAggregatedMemoryContext();
OrcDataSourceId orcDataSourceId = orcDataSource.getId();
OrcRecordReader recordReader = reader.createRecordReader(fileReadColumns, fileReadTypes, projectedLayouts, predicateBuilder.build(), start, length, UTC, memoryUsage, INITIAL_BATCH_SIZE, exception -> handleException(orcDataSourceId, exception), new IdBasedFieldMapperFactory(readColumns));
return new ReaderPageSource(new OrcPageSource(recordReader, columnAdaptations, orcDataSource, Optional.empty(), Optional.empty(), memoryUsage, stats), columnProjections);
} catch (Exception e) {
if (orcDataSource != null) {
try {
orcDataSource.close();
} catch (IOException ignored) {
}
}
if (e instanceof TrinoException) {
throw (TrinoException) e;
}
String message = format("Error opening Iceberg split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
if (e instanceof BlockMissingException) {
throw new TrinoException(ICEBERG_MISSING_DATA, message, e);
}
throw new TrinoException(ICEBERG_CANNOT_OPEN_SPLIT, message, e);
}
}
use of io.trino.spi.predicate.Domain in project trino by trinodb.
the class IcebergMetadata method applyFilter.
@Override
public Optional<ConstraintApplicationResult<ConnectorTableHandle>> applyFilter(ConnectorSession session, ConnectorTableHandle handle, Constraint constraint) {
IcebergTableHandle table = (IcebergTableHandle) handle;
Table icebergTable = catalog.loadTable(session, table.getSchemaTableName());
Set<Integer> partitionSourceIds = identityPartitionColumnsInAllSpecs(icebergTable);
BiPredicate<IcebergColumnHandle, Domain> isIdentityPartition = (column, domain) -> partitionSourceIds.contains(column.getId());
TupleDomain<IcebergColumnHandle> newEnforcedConstraint = constraint.getSummary().transformKeys(IcebergColumnHandle.class::cast).filter(isIdentityPartition).intersect(table.getEnforcedPredicate());
TupleDomain<IcebergColumnHandle> remainingConstraint = constraint.getSummary().transformKeys(IcebergColumnHandle.class::cast).filter(isIdentityPartition.negate());
TupleDomain<IcebergColumnHandle> newUnenforcedConstraint = remainingConstraint.filter((columnHandle, predicate) -> !isStructuralType(columnHandle.getType())).intersect(table.getUnenforcedPredicate());
if (newEnforcedConstraint.equals(table.getEnforcedPredicate()) && newUnenforcedConstraint.equals(table.getUnenforcedPredicate())) {
return Optional.empty();
}
return Optional.of(new ConstraintApplicationResult<>(new IcebergTableHandle(table.getSchemaName(), table.getTableName(), table.getTableType(), table.getSnapshotId(), newUnenforcedConstraint, newEnforcedConstraint, table.getProjectedColumns(), table.getNameMappingJson()), remainingConstraint.transformKeys(ColumnHandle.class::cast), false));
}
use of io.trino.spi.predicate.Domain in project trino by trinodb.
the class ColumnJdbcTable method cursor.
@Override
public RecordCursor cursor(ConnectorTransactionHandle transactionHandle, ConnectorSession connectorSession, TupleDomain<Integer> constraint) {
Builder table = InMemoryRecordSet.builder(METADATA);
if (constraint.isNone()) {
return table.build().cursor();
}
Session session = ((FullConnectorSession) connectorSession).getSession();
boolean omitDateTimeTypePrecision = isOmitDateTimeTypePrecision(session);
Optional<String> catalogFilter = tryGetSingleVarcharValue(constraint, 0);
Optional<String> schemaFilter = tryGetSingleVarcharValue(constraint, 1);
Optional<String> tableFilter = tryGetSingleVarcharValue(constraint, 2);
Domain catalogDomain = constraint.getDomains().get().getOrDefault(0, Domain.all(createUnboundedVarcharType()));
Domain schemaDomain = constraint.getDomains().get().getOrDefault(1, Domain.all(createUnboundedVarcharType()));
Domain tableDomain = constraint.getDomains().get().getOrDefault(2, Domain.all(createUnboundedVarcharType()));
if (isNonLowercase(schemaFilter) || isNonLowercase(tableFilter)) {
// Non-lowercase predicate will never match a lowercase name (until TODO https://github.com/trinodb/trino/issues/17)
return table.build().cursor();
}
for (String catalog : listCatalogs(session, metadata, accessControl, catalogFilter).keySet()) {
if (!catalogDomain.includesNullableValue(utf8Slice(catalog))) {
continue;
}
if ((schemaDomain.isAll() && tableDomain.isAll()) || schemaFilter.isPresent()) {
QualifiedTablePrefix tablePrefix = tablePrefix(catalog, schemaFilter, tableFilter);
Map<SchemaTableName, List<ColumnMetadata>> tableColumns = listTableColumns(session, metadata, accessControl, tablePrefix);
addColumnsRow(table, catalog, tableColumns, omitDateTimeTypePrecision);
} else {
Collection<String> schemas = listSchemas(session, metadata, accessControl, catalog, schemaFilter);
for (String schema : schemas) {
if (!schemaDomain.includesNullableValue(utf8Slice(schema))) {
continue;
}
QualifiedTablePrefix tablePrefix = tableFilter.isPresent() ? new QualifiedTablePrefix(catalog, schema, tableFilter.get()) : new QualifiedTablePrefix(catalog, schema);
Set<SchemaTableName> tables = listTables(session, metadata, accessControl, tablePrefix);
for (SchemaTableName schemaTableName : tables) {
String tableName = schemaTableName.getTableName();
if (!tableDomain.includesNullableValue(utf8Slice(tableName))) {
continue;
}
Map<SchemaTableName, List<ColumnMetadata>> tableColumns = listTableColumns(session, metadata, accessControl, new QualifiedTablePrefix(catalog, schema, tableName));
addColumnsRow(table, catalog, tableColumns, omitDateTimeTypePrecision);
}
}
}
}
return table.build().cursor();
}
use of io.trino.spi.predicate.Domain in project trino by trinodb.
the class DynamicFilterService method collectDynamicFilters.
private void collectDynamicFilters(StageId stageId, Optional<Set<DynamicFilterId>> selectedFilters) {
DynamicFilterContext context = dynamicFilterContexts.get(stageId.getQueryId());
if (context == null) {
// query has been removed
return;
}
OptionalInt stageNumberOfTasks = context.getNumberOfTasks(stageId);
Map<DynamicFilterId, List<Domain>> newDynamicFilters = context.getTaskDynamicFilters(stageId, selectedFilters).entrySet().stream().filter(stageDomains -> {
if (stageDomains.getValue().stream().anyMatch(Domain::isAll)) {
// if one of the domains is all, we don't need to get dynamic filters from all tasks
return true;
}
if (!stageDomains.getValue().isEmpty() && context.getReplicatedDynamicFilters().contains(stageDomains.getKey())) {
// for replicated dynamic filters it's enough to get dynamic filter from a single task
checkState(stageDomains.getValue().size() == 1, "Replicated dynamic filter should be collected from single task");
return true;
}
// check if all tasks of a dynamic filter source have reported dynamic filter summary
return stageNumberOfTasks.isPresent() && stageDomains.getValue().size() == stageNumberOfTasks.getAsInt();
}).collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue));
context.addDynamicFilters(newDynamicFilters);
}
use of io.trino.spi.predicate.Domain in project trino by trinodb.
the class DynamicFilterService method translateSummaryToTupleDomain.
private TupleDomain<ColumnHandle> translateSummaryToTupleDomain(DynamicFilterId filterId, DynamicFilterContext dynamicFilterContext, Multimap<DynamicFilterId, DynamicFilters.Descriptor> descriptorMultimap, Map<Symbol, ColumnHandle> columnHandles, TypeProvider typeProvider) {
Collection<DynamicFilters.Descriptor> descriptors = descriptorMultimap.get(filterId);
checkState(descriptors != null, "No descriptors for dynamic filter %s", filterId);
Domain summary = dynamicFilterContext.getDynamicFilterSummaries().get(filterId);
return TupleDomain.withColumnDomains(descriptors.stream().collect(toImmutableMap(descriptor -> {
Symbol probeSymbol = Symbol.from(descriptor.getInput());
return requireNonNull(columnHandles.get(probeSymbol), () -> format("Missing probe column for %s", probeSymbol));
}, descriptor -> {
Type targetType = typeProvider.get(Symbol.from(descriptor.getInput()));
Domain updatedSummary = descriptor.applyComparison(summary);
if (!updatedSummary.getType().equals(targetType)) {
return applySaturatedCasts(metadata, functionManager, typeOperators, dynamicFilterContext.getSession(), updatedSummary, targetType);
}
return updatedSummary;
})));
}
Aggregations