use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.
the class IcebergPageSourceProvider method createOrcPageSource.
private static ReaderPageSource createOrcPageSource(HdfsEnvironment hdfsEnvironment, ConnectorIdentity identity, Configuration configuration, Path path, long start, long length, long fileSize, List<IcebergColumnHandle> columns, TupleDomain<IcebergColumnHandle> effectivePredicate, OrcReaderOptions options, FileFormatDataSourceStats stats, TypeManager typeManager, Optional<NameMapping> nameMapping) {
OrcDataSource orcDataSource = null;
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(identity, path, configuration);
FSDataInputStream inputStream = hdfsEnvironment.doAs(identity, () -> fileSystem.open(path));
orcDataSource = new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileSize, options, inputStream, stats);
OrcReader reader = OrcReader.createOrcReader(orcDataSource, options).orElseThrow(() -> new TrinoException(ICEBERG_BAD_DATA, "ORC file is zero length"));
List<OrcColumn> fileColumns = reader.getRootColumn().getNestedColumns();
if (nameMapping.isPresent() && !hasIds(reader.getRootColumn())) {
fileColumns = fileColumns.stream().map(orcColumn -> setMissingFieldIds(orcColumn, nameMapping.get(), ImmutableList.of(orcColumn.getColumnName()))).collect(toImmutableList());
}
Map<Integer, OrcColumn> fileColumnsByIcebergId = mapIdsToOrcFileColumns(fileColumns);
TupleDomainOrcPredicateBuilder predicateBuilder = TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(options.isBloomFiltersEnabled());
Map<IcebergColumnHandle, Domain> effectivePredicateDomains = effectivePredicate.getDomains().orElseThrow(() -> new IllegalArgumentException("Effective predicate is none"));
Optional<ReaderColumns> columnProjections = projectColumns(columns);
Map<Integer, List<List<Integer>>> projectionsByFieldId = columns.stream().collect(groupingBy(column -> column.getBaseColumnIdentity().getId(), mapping(IcebergColumnHandle::getPath, toUnmodifiableList())));
List<IcebergColumnHandle> readColumns = columnProjections.map(readerColumns -> (List<IcebergColumnHandle>) readerColumns.get().stream().map(IcebergColumnHandle.class::cast).collect(toImmutableList())).orElse(columns);
List<OrcColumn> fileReadColumns = new ArrayList<>(readColumns.size());
List<Type> fileReadTypes = new ArrayList<>(readColumns.size());
List<ProjectedLayout> projectedLayouts = new ArrayList<>(readColumns.size());
List<ColumnAdaptation> columnAdaptations = new ArrayList<>(readColumns.size());
for (IcebergColumnHandle column : readColumns) {
verify(column.isBaseColumn(), "Column projections must be based from a root column");
OrcColumn orcColumn = fileColumnsByIcebergId.get(column.getId());
if (orcColumn != null) {
Type readType = getOrcReadType(column.getType(), typeManager);
if (column.getType() == UUID && !"UUID".equals(orcColumn.getAttributes().get(ICEBERG_BINARY_TYPE))) {
throw new TrinoException(ICEBERG_BAD_DATA, format("Expected ORC column for UUID data to be annotated with %s=UUID: %s", ICEBERG_BINARY_TYPE, orcColumn));
}
List<List<Integer>> fieldIdProjections = projectionsByFieldId.get(column.getId());
ProjectedLayout projectedLayout = IcebergOrcProjectedLayout.createProjectedLayout(orcColumn, fieldIdProjections);
int sourceIndex = fileReadColumns.size();
columnAdaptations.add(ColumnAdaptation.sourceColumn(sourceIndex));
fileReadColumns.add(orcColumn);
fileReadTypes.add(readType);
projectedLayouts.add(projectedLayout);
for (Map.Entry<IcebergColumnHandle, Domain> domainEntry : effectivePredicateDomains.entrySet()) {
IcebergColumnHandle predicateColumn = domainEntry.getKey();
OrcColumn predicateOrcColumn = fileColumnsByIcebergId.get(predicateColumn.getId());
if (predicateOrcColumn != null && column.getColumnIdentity().equals(predicateColumn.getBaseColumnIdentity())) {
predicateBuilder.addColumn(predicateOrcColumn.getColumnId(), domainEntry.getValue());
}
}
} else {
columnAdaptations.add(ColumnAdaptation.nullColumn(column.getType()));
}
}
AggregatedMemoryContext memoryUsage = newSimpleAggregatedMemoryContext();
OrcDataSourceId orcDataSourceId = orcDataSource.getId();
OrcRecordReader recordReader = reader.createRecordReader(fileReadColumns, fileReadTypes, projectedLayouts, predicateBuilder.build(), start, length, UTC, memoryUsage, INITIAL_BATCH_SIZE, exception -> handleException(orcDataSourceId, exception), new IdBasedFieldMapperFactory(readColumns));
return new ReaderPageSource(new OrcPageSource(recordReader, columnAdaptations, orcDataSource, Optional.empty(), Optional.empty(), memoryUsage, stats), columnProjections);
} catch (Exception e) {
if (orcDataSource != null) {
try {
orcDataSource.close();
} catch (IOException ignored) {
}
}
if (e instanceof TrinoException) {
throw (TrinoException) e;
}
String message = format("Error opening Iceberg split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
if (e instanceof BlockMissingException) {
throw new TrinoException(ICEBERG_MISSING_DATA, message, e);
}
throw new TrinoException(ICEBERG_CANNOT_OPEN_SPLIT, message, e);
}
}
use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.
the class IcebergMetadata method applyFilter.
@Override
public Optional<ConstraintApplicationResult<ConnectorTableHandle>> applyFilter(ConnectorSession session, ConnectorTableHandle handle, Constraint constraint) {
IcebergTableHandle table = (IcebergTableHandle) handle;
Table icebergTable = catalog.loadTable(session, table.getSchemaTableName());
Set<Integer> partitionSourceIds = identityPartitionColumnsInAllSpecs(icebergTable);
BiPredicate<IcebergColumnHandle, Domain> isIdentityPartition = (column, domain) -> partitionSourceIds.contains(column.getId());
TupleDomain<IcebergColumnHandle> newEnforcedConstraint = constraint.getSummary().transformKeys(IcebergColumnHandle.class::cast).filter(isIdentityPartition).intersect(table.getEnforcedPredicate());
TupleDomain<IcebergColumnHandle> remainingConstraint = constraint.getSummary().transformKeys(IcebergColumnHandle.class::cast).filter(isIdentityPartition.negate());
TupleDomain<IcebergColumnHandle> newUnenforcedConstraint = remainingConstraint.filter((columnHandle, predicate) -> !isStructuralType(columnHandle.getType())).intersect(table.getUnenforcedPredicate());
if (newEnforcedConstraint.equals(table.getEnforcedPredicate()) && newUnenforcedConstraint.equals(table.getUnenforcedPredicate())) {
return Optional.empty();
}
return Optional.of(new ConstraintApplicationResult<>(new IcebergTableHandle(table.getSchemaName(), table.getTableName(), table.getTableType(), table.getSnapshotId(), newUnenforcedConstraint, newEnforcedConstraint, table.getProjectedColumns(), table.getNameMappingJson()), remainingConstraint.transformKeys(ColumnHandle.class::cast), false));
}
use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.
the class ColumnJdbcTable method cursor.
@Override
public RecordCursor cursor(ConnectorTransactionHandle transactionHandle, ConnectorSession connectorSession, TupleDomain<Integer> constraint) {
Builder table = InMemoryRecordSet.builder(METADATA);
if (constraint.isNone()) {
return table.build().cursor();
}
Session session = ((FullConnectorSession) connectorSession).getSession();
boolean omitDateTimeTypePrecision = isOmitDateTimeTypePrecision(session);
Optional<String> catalogFilter = tryGetSingleVarcharValue(constraint, 0);
Optional<String> schemaFilter = tryGetSingleVarcharValue(constraint, 1);
Optional<String> tableFilter = tryGetSingleVarcharValue(constraint, 2);
Domain catalogDomain = constraint.getDomains().get().getOrDefault(0, Domain.all(createUnboundedVarcharType()));
Domain schemaDomain = constraint.getDomains().get().getOrDefault(1, Domain.all(createUnboundedVarcharType()));
Domain tableDomain = constraint.getDomains().get().getOrDefault(2, Domain.all(createUnboundedVarcharType()));
if (isNonLowercase(schemaFilter) || isNonLowercase(tableFilter)) {
// Non-lowercase predicate will never match a lowercase name (until TODO https://github.com/trinodb/trino/issues/17)
return table.build().cursor();
}
for (String catalog : listCatalogs(session, metadata, accessControl, catalogFilter).keySet()) {
if (!catalogDomain.includesNullableValue(utf8Slice(catalog))) {
continue;
}
if ((schemaDomain.isAll() && tableDomain.isAll()) || schemaFilter.isPresent()) {
QualifiedTablePrefix tablePrefix = tablePrefix(catalog, schemaFilter, tableFilter);
Map<SchemaTableName, List<ColumnMetadata>> tableColumns = listTableColumns(session, metadata, accessControl, tablePrefix);
addColumnsRow(table, catalog, tableColumns, omitDateTimeTypePrecision);
} else {
Collection<String> schemas = listSchemas(session, metadata, accessControl, catalog, schemaFilter);
for (String schema : schemas) {
if (!schemaDomain.includesNullableValue(utf8Slice(schema))) {
continue;
}
QualifiedTablePrefix tablePrefix = tableFilter.isPresent() ? new QualifiedTablePrefix(catalog, schema, tableFilter.get()) : new QualifiedTablePrefix(catalog, schema);
Set<SchemaTableName> tables = listTables(session, metadata, accessControl, tablePrefix);
for (SchemaTableName schemaTableName : tables) {
String tableName = schemaTableName.getTableName();
if (!tableDomain.includesNullableValue(utf8Slice(tableName))) {
continue;
}
Map<SchemaTableName, List<ColumnMetadata>> tableColumns = listTableColumns(session, metadata, accessControl, new QualifiedTablePrefix(catalog, schema, tableName));
addColumnsRow(table, catalog, tableColumns, omitDateTimeTypePrecision);
}
}
}
}
return table.build().cursor();
}
use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.
the class DynamicFilterService method translateSummaryToTupleDomain.
private TupleDomain<ColumnHandle> translateSummaryToTupleDomain(DynamicFilterId filterId, DynamicFilterContext dynamicFilterContext, Multimap<DynamicFilterId, DynamicFilters.Descriptor> descriptorMultimap, Map<Symbol, ColumnHandle> columnHandles, TypeProvider typeProvider) {
Collection<DynamicFilters.Descriptor> descriptors = descriptorMultimap.get(filterId);
checkState(descriptors != null, "No descriptors for dynamic filter %s", filterId);
Domain summary = dynamicFilterContext.getDynamicFilterSummaries().get(filterId);
return TupleDomain.withColumnDomains(descriptors.stream().collect(toImmutableMap(descriptor -> {
Symbol probeSymbol = Symbol.from(descriptor.getInput());
return requireNonNull(columnHandles.get(probeSymbol), () -> format("Missing probe column for %s", probeSymbol));
}, descriptor -> {
Type targetType = typeProvider.get(Symbol.from(descriptor.getInput()));
Domain updatedSummary = descriptor.applyComparison(summary);
if (!updatedSummary.getType().equals(targetType)) {
return applySaturatedCasts(metadata, functionManager, typeOperators, dynamicFilterContext.getSession(), updatedSummary, targetType);
}
return updatedSummary;
})));
}
use of io.trino.spi.predicate.TupleDomain in project trino by trinodb.
the class CassandraPartitionManager method getPartitionKeysList.
private static List<Set<Object>> getPartitionKeysList(CassandraTable table, TupleDomain<ColumnHandle> tupleDomain) {
ImmutableList.Builder<Set<Object>> partitionColumnValues = ImmutableList.builder();
for (CassandraColumnHandle columnHandle : table.getPartitionKeyColumns()) {
Domain domain = tupleDomain.getDomains().get().get(columnHandle);
// if there is no constraint on a partition key, return an empty set
if (domain == null) {
return ImmutableList.of();
}
// todo does cassandra allow null partition keys?
if (domain.isNullAllowed()) {
return ImmutableList.of();
}
Set<Object> values = domain.getValues().getValuesProcessor().transform(ranges -> {
ImmutableSet.Builder<Object> columnValues = ImmutableSet.builder();
for (Range range : ranges.getOrderedRanges()) {
// if the range is not a single value, we cannot perform partition pruning
if (!range.isSingleValue()) {
return ImmutableSet.of();
}
Object value = range.getSingleValue();
CassandraType valueType = columnHandle.getCassandraType();
if (valueType.isSupportedPartitionKey()) {
columnValues.add(value);
}
}
return columnValues.build();
}, discreteValues -> {
if (discreteValues.isInclusive()) {
return ImmutableSet.copyOf(discreteValues.getValues());
}
return ImmutableSet.of();
}, allOrNone -> ImmutableSet.of());
partitionColumnValues.add(values);
}
return partitionColumnValues.build();
}
Aggregations