Search in sources :

Example 96 with ColumnHandle

use of io.trino.spi.connector.ColumnHandle in project trino by trinodb.

the class BaseIcebergConnectorTest method assertFilterPushdown.

private void assertFilterPushdown(QualifiedObjectName tableName, Map<String, Domain> filter, Map<String, Domain> expectedEnforcedPredicate, Map<String, Domain> expectedUnenforcedPredicate) {
    Metadata metadata = getQueryRunner().getMetadata();
    newTransaction().execute(getSession(), session -> {
        TableHandle table = metadata.getTableHandle(session, tableName).orElseThrow(() -> new TableNotFoundException(tableName.asSchemaTableName()));
        Map<String, ColumnHandle> columns = metadata.getColumnHandles(session, table);
        TupleDomain<ColumnHandle> domains = TupleDomain.withColumnDomains(filter.entrySet().stream().collect(toImmutableMap(entry -> columns.get(entry.getKey()), Map.Entry::getValue)));
        Optional<ConstraintApplicationResult<TableHandle>> result = metadata.applyFilter(session, table, new Constraint(domains));
        assertTrue(result.isEmpty() == (expectedUnenforcedPredicate == null && expectedEnforcedPredicate == null));
        if (result.isPresent()) {
            IcebergTableHandle newTable = (IcebergTableHandle) result.get().getHandle().getConnectorHandle();
            assertEquals(newTable.getEnforcedPredicate(), TupleDomain.withColumnDomains(expectedEnforcedPredicate.entrySet().stream().collect(toImmutableMap(entry -> columns.get(entry.getKey()), Map.Entry::getValue))));
            assertEquals(newTable.getUnenforcedPredicate(), TupleDomain.withColumnDomains(expectedUnenforcedPredicate.entrySet().stream().collect(toImmutableMap(entry -> columns.get(entry.getKey()), Map.Entry::getValue))));
        }
    });
}
Also used : SkipException(org.testng.SkipException) FileSystem(org.apache.hadoop.fs.FileSystem) Test(org.testng.annotations.Test) TestTable(io.trino.testing.sql.TestTable) BROADCAST(io.trino.sql.planner.OptimizerConfig.JoinDistributionType.BROADCAST) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Matcher(java.util.regex.Matcher) Map(java.util.Map) MaterializedRow(io.trino.testing.MaterializedRow) Path(java.nio.file.Path) Assert.assertFalse(org.testng.Assert.assertFalse) Assert.assertEquals(io.trino.testing.assertions.Assert.assertEquals) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) Assert.assertNotEquals(org.testng.Assert.assertNotEquals) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) Schema(org.apache.avro.Schema) DataProviders(io.trino.testing.DataProviders) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) DataFileWriter(org.apache.avro.file.DataFileWriter) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) Collectors.joining(java.util.stream.Collectors.joining) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) TestingSession.testSessionBuilder(io.trino.testing.TestingSession.testSessionBuilder) Stream(java.util.stream.Stream) ORC(io.trino.plugin.iceberg.IcebergFileFormat.ORC) Session(io.trino.Session) NullableValue(io.trino.spi.predicate.NullableValue) GenericData(org.apache.avro.generic.GenericData) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) String.join(java.lang.String.join) ColumnHandle(io.trino.spi.connector.ColumnHandle) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) LongStream(java.util.stream.LongStream) OperatorStats(io.trino.operator.OperatorStats) Language(org.intellij.lang.annotations.Language) Files(java.nio.file.Files) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) File(java.io.File) BaseConnectorTest(io.trino.testing.BaseConnectorTest) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) TableHandle(io.trino.metadata.TableHandle) QualifiedObjectName(io.trino.metadata.QualifiedObjectName) Paths(java.nio.file.Paths) QueryRunner(io.trino.testing.QueryRunner) Domain.multipleValues(io.trino.spi.predicate.Domain.multipleValues) QueryId(io.trino.spi.QueryId) TransactionBuilder.transaction(io.trino.transaction.TransactionBuilder.transaction) IntStream.range(java.util.stream.IntStream.range) MaterializedResult(io.trino.testing.MaterializedResult) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) PREFERRED_WRITE_PARTITIONING_MIN_NUMBER_OF_PARTITIONS(io.trino.SystemSessionProperties.PREFERRED_WRITE_PARTITIONING_MIN_NUMBER_OF_PARTITIONS) MoreCollectors.onlyElement(com.google.common.collect.MoreCollectors.onlyElement) ICEBERG_DOMAIN_COMPACTION_THRESHOLD(io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) QueryAssertions.assertEqualsIgnoreOrder(io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder) Locale(java.util.Locale) Iterables.concat(com.google.common.collect.Iterables.concat) TestingConnectorBehavior(io.trino.testing.TestingConnectorBehavior) TestTable.randomTableSuffix(io.trino.testing.sql.TestTable.randomTableSuffix) TpchTable(io.trino.tpch.TpchTable) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) Collections.nCopies(java.util.Collections.nCopies) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) ICEBERG_CATALOG(io.trino.plugin.iceberg.IcebergQueryRunner.ICEBERG_CATALOG) DataSize(io.airlift.units.DataSize) List(java.util.List) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) BIGINT(io.trino.spi.type.BigintType.BIGINT) Domain.singleValue(io.trino.spi.predicate.Domain.singleValue) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) DataFileReader(org.apache.avro.file.DataFileReader) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) IcebergQueryRunner.createIcebergQueryRunner(io.trino.plugin.iceberg.IcebergQueryRunner.createIcebergQueryRunner) LINE_ITEM(io.trino.tpch.TpchTable.LINE_ITEM) IntStream(java.util.stream.IntStream) Constraint(io.trino.spi.connector.Constraint) DataProvider(org.testng.annotations.DataProvider) PARQUET(io.trino.plugin.iceberg.IcebergFileFormat.PARQUET) ImmutableList(com.google.common.collect.ImmutableList) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) Objects.requireNonNull(java.util.Objects.requireNonNull) TableStatistics(io.trino.spi.statistics.TableStatistics) NoSuchElementException(java.util.NoSuchElementException) VerifyException(com.google.common.base.VerifyException) OutputStream(java.io.OutputStream) ResultWithQueryId(io.trino.testing.ResultWithQueryId) TupleDomain(io.trino.spi.predicate.TupleDomain) Consumer(java.util.function.Consumer) Assert.assertEventually(io.trino.testing.assertions.Assert.assertEventually) JOIN_DISTRIBUTION_TYPE(io.trino.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE) Metadata(io.trino.metadata.Metadata) Assert.assertTrue(org.testng.Assert.assertTrue) MaterializedResult.resultBuilder(io.trino.testing.MaterializedResult.resultBuilder) ColumnHandle(io.trino.spi.connector.ColumnHandle) Constraint(io.trino.spi.connector.Constraint) Metadata(io.trino.metadata.Metadata) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) TableHandle(io.trino.metadata.TableHandle) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 97 with ColumnHandle

use of io.trino.spi.connector.ColumnHandle in project trino by trinodb.

the class TableStatisticsMaker method makeTableStatistics.

private TableStatistics makeTableStatistics(IcebergTableHandle tableHandle, Constraint constraint) {
    if (tableHandle.getSnapshotId().isEmpty() || constraint.getSummary().isNone()) {
        return TableStatistics.empty();
    }
    TupleDomain<IcebergColumnHandle> intersection = constraint.getSummary().transformKeys(IcebergColumnHandle.class::cast).intersect(tableHandle.getEnforcedPredicate());
    if (intersection.isNone()) {
        return TableStatistics.empty();
    }
    Schema icebergTableSchema = icebergTable.schema();
    List<Types.NestedField> columns = icebergTableSchema.columns();
    Map<Integer, Type.PrimitiveType> idToTypeMapping = primitiveFieldTypes(icebergTableSchema);
    List<PartitionField> partitionFields = icebergTable.spec().fields();
    List<Type> icebergPartitionTypes = partitionTypes(partitionFields, idToTypeMapping);
    List<IcebergColumnHandle> columnHandles = getColumns(icebergTableSchema, typeManager);
    Map<Integer, IcebergColumnHandle> idToColumnHandle = columnHandles.stream().collect(toUnmodifiableMap(IcebergColumnHandle::getId, identity()));
    ImmutableMap.Builder<Integer, ColumnFieldDetails> idToDetailsBuilder = ImmutableMap.builder();
    for (int index = 0; index < partitionFields.size(); index++) {
        PartitionField field = partitionFields.get(index);
        Type type = icebergPartitionTypes.get(index);
        idToDetailsBuilder.put(field.fieldId(), new ColumnFieldDetails(field, idToColumnHandle.get(field.sourceId()), type, toTrinoType(type, typeManager), type.typeId().javaClass()));
    }
    Map<Integer, ColumnFieldDetails> idToDetails = idToDetailsBuilder.buildOrThrow();
    TableScan tableScan = icebergTable.newScan().filter(toIcebergExpression(intersection)).useSnapshot(tableHandle.getSnapshotId().get()).includeColumnStats();
    IcebergStatistics.Builder icebergStatisticsBuilder = new IcebergStatistics.Builder(columns, typeManager);
    try (CloseableIterable<FileScanTask> fileScanTasks = tableScan.planFiles()) {
        for (FileScanTask fileScanTask : fileScanTasks) {
            DataFile dataFile = fileScanTask.file();
            if (!dataFileMatches(dataFile, constraint, partitionFields, idToDetails)) {
                continue;
            }
            icebergStatisticsBuilder.acceptDataFile(dataFile, fileScanTask.spec());
        }
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
    IcebergStatistics summary = icebergStatisticsBuilder.build();
    if (summary.getFileCount() == 0) {
        return TableStatistics.empty();
    }
    ImmutableMap.Builder<ColumnHandle, ColumnStatistics> columnHandleBuilder = ImmutableMap.builder();
    double recordCount = summary.getRecordCount();
    for (IcebergColumnHandle columnHandle : idToColumnHandle.values()) {
        int fieldId = columnHandle.getId();
        ColumnStatistics.Builder columnBuilder = new ColumnStatistics.Builder();
        Long nullCount = summary.getNullCounts().get(fieldId);
        if (nullCount != null) {
            columnBuilder.setNullsFraction(Estimate.of(nullCount / recordCount));
        }
        if (summary.getColumnSizes() != null) {
            Long columnSize = summary.getColumnSizes().get(fieldId);
            if (columnSize != null) {
                columnBuilder.setDataSize(Estimate.of(columnSize));
            }
        }
        Object min = summary.getMinValues().get(fieldId);
        Object max = summary.getMaxValues().get(fieldId);
        if (min != null && max != null) {
            columnBuilder.setRange(DoubleRange.from(columnHandle.getType(), min, max));
        }
        columnHandleBuilder.put(columnHandle, columnBuilder.build());
    }
    return new TableStatistics(Estimate.of(recordCount), columnHandleBuilder.buildOrThrow());
}
Also used : Schema(org.apache.iceberg.Schema) UncheckedIOException(java.io.UncheckedIOException) DataFile(org.apache.iceberg.DataFile) PartitionField(org.apache.iceberg.PartitionField) ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) TableScan(org.apache.iceberg.TableScan) ColumnHandle(io.trino.spi.connector.ColumnHandle) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) ImmutableMap(com.google.common.collect.ImmutableMap) Constraint(io.trino.spi.connector.Constraint) TypeConverter.toTrinoType(io.trino.plugin.iceberg.TypeConverter.toTrinoType) Type(org.apache.iceberg.types.Type) TableStatistics(io.trino.spi.statistics.TableStatistics) FileScanTask(org.apache.iceberg.FileScanTask)

Example 98 with ColumnHandle

use of io.trino.spi.connector.ColumnHandle in project trino by trinodb.

the class KuduMetadata method getTableProperties.

@Override
public ConnectorTableProperties getTableProperties(ConnectorSession session, ConnectorTableHandle table) {
    KuduTableHandle handle = (KuduTableHandle) table;
    KuduTable kuduTable = handle.getTable(clientSession);
    Optional<ConnectorTablePartitioning> tablePartitioning = Optional.empty();
    Optional<Set<ColumnHandle>> partitioningColumns = Optional.empty();
    List<LocalProperty<ColumnHandle>> localProperties = ImmutableList.of();
    if (isKuduGroupedExecutionEnabled(session) && isTableSupportGroupedExecution(kuduTable)) {
        Map<String, ColumnHandle> columnMap = getColumnHandles(session, handle);
        List<Integer> bucketColumnIds = getBucketColumnIds(kuduTable);
        List<ColumnHandle> bucketColumns = getSpecifyColumns(kuduTable.getSchema(), bucketColumnIds, columnMap);
        Optional<List<KuduRangePartition>> kuduRangePartitions = getKuduRangePartitions(kuduTable);
        tablePartitioning = Optional.of(new ConnectorTablePartitioning(new KuduPartitioningHandle(handle.getSchemaTableName().getSchemaName(), handle.getSchemaTableName().getTableName(), handle.getBucketCount().orElse(0), bucketColumnIds, bucketColumns.stream().map(KuduColumnHandle.class::cast).map(KuduColumnHandle::getType).collect(Collectors.toList()), kuduRangePartitions), bucketColumns));
        partitioningColumns = Optional.of(ImmutableSet.copyOf(bucketColumns));
    }
    return new ConnectorTableProperties(handle.getConstraint(), tablePartitioning, partitioningColumns, Optional.empty(), localProperties);
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) KuduTable(org.apache.kudu.client.KuduTable) ConnectorTablePartitioning(io.trino.spi.connector.ConnectorTablePartitioning) LocalProperty(io.trino.spi.connector.LocalProperty) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties)

Example 99 with ColumnHandle

use of io.trino.spi.connector.ColumnHandle in project trino by trinodb.

the class KuduMetadata method getColumnHandles.

@Override
public Map<String, ColumnHandle> getColumnHandles(ConnectorSession session, ConnectorTableHandle connectorTableHandle) {
    KuduTableHandle tableHandle = (KuduTableHandle) connectorTableHandle;
    Schema schema = clientSession.getTableSchema(tableHandle);
    ImmutableMap.Builder<String, ColumnHandle> columnHandles = ImmutableMap.builder();
    for (int ordinal = 0; ordinal < schema.getColumnCount(); ordinal++) {
        ColumnSchema col = schema.getColumnByIndex(ordinal);
        String name = col.getName();
        Type type = TypeHelper.fromKuduColumn(col);
        KuduColumnHandle columnHandle = new KuduColumnHandle(name, ordinal, type);
        columnHandles.put(name, columnHandle);
    }
    return columnHandles.buildOrThrow();
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) Type(io.trino.spi.type.Type) VarcharType(io.trino.spi.type.VarcharType) VarbinaryType(io.trino.spi.type.VarbinaryType) Schema(org.apache.kudu.Schema) ColumnSchema(org.apache.kudu.ColumnSchema) HashBucketSchema(org.apache.kudu.client.PartitionSchema.HashBucketSchema) ColumnSchema(org.apache.kudu.ColumnSchema) ImmutableMap(com.google.common.collect.ImmutableMap) Constraint(io.trino.spi.connector.Constraint)

Example 100 with ColumnHandle

use of io.trino.spi.connector.ColumnHandle in project trino by trinodb.

the class TestMongoSession method testBuildQueryOr.

@Test
public void testBuildQueryOr() {
    TupleDomain<ColumnHandle> tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of(COL1, Domain.create(ValueSet.ofRanges(lessThan(BIGINT, 100L), greaterThan(BIGINT, 200L)), false)));
    Document query = MongoSession.buildQuery(tupleDomain);
    Document expected = new Document("$or", asList(new Document(COL1.getName(), new Document("$lt", 100L)), new Document(COL1.getName(), new Document("$gt", 200L))));
    assertEquals(query, expected);
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) Document(org.bson.Document) Test(org.testng.annotations.Test)

Aggregations

ColumnHandle (io.trino.spi.connector.ColumnHandle)268 Test (org.testng.annotations.Test)121 ImmutableList (com.google.common.collect.ImmutableList)106 TupleDomain (io.trino.spi.predicate.TupleDomain)104 ImmutableMap (com.google.common.collect.ImmutableMap)93 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)86 Domain (io.trino.spi.predicate.Domain)86 Map (java.util.Map)78 ConnectorSession (io.trino.spi.connector.ConnectorSession)76 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)71 Optional (java.util.Optional)70 List (java.util.List)69 SchemaTableName (io.trino.spi.connector.SchemaTableName)66 Constraint (io.trino.spi.connector.Constraint)61 Objects.requireNonNull (java.util.Objects.requireNonNull)53 ImmutableSet (com.google.common.collect.ImmutableSet)51 NullableValue (io.trino.spi.predicate.NullableValue)50 Type (io.trino.spi.type.Type)48 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)45 ColumnMetadata (io.trino.spi.connector.ColumnMetadata)45