Search in sources :

Example 26 with PartitionField

use of org.apache.iceberg.PartitionField in project iceberg by apache.

the class SortOrderUtil method buildSortOrder.

public static SortOrder buildSortOrder(Schema schema, PartitionSpec spec, SortOrder sortOrder) {
    if (sortOrder.isUnsorted() && spec.isUnpartitioned()) {
        return SortOrder.unsorted();
    }
    Multimap<Integer, SortField> sortFieldIndex = Multimaps.index(sortOrder.fields(), SortField::sourceId);
    // build a sort prefix of partition fields that are not already in the sort order
    SortOrder.Builder builder = SortOrder.builderFor(schema);
    for (PartitionField field : spec.fields()) {
        Collection<SortField> sortFields = sortFieldIndex.get(field.sourceId());
        boolean isSorted = sortFields.stream().anyMatch(sortField -> field.transform().equals(sortField.transform()) || sortField.transform().satisfiesOrderOf(field.transform()));
        if (!isSorted) {
            String sourceName = schema.findColumnName(field.sourceId());
            builder.asc(Expressions.transform(sourceName, field.transform()));
        }
    }
    // add the configured sort to the partition spec prefix sort
    SortOrderVisitor.visit(sortOrder, new CopySortOrderFields(builder));
    return builder.build();
}
Also used : PartitionField(org.apache.iceberg.PartitionField) SortOrder(org.apache.iceberg.SortOrder) SortField(org.apache.iceberg.SortField)

Example 27 with PartitionField

use of org.apache.iceberg.PartitionField in project trino by trinodb.

the class PartitionTable method buildRecordCursor.

private RecordCursor buildRecordCursor(Map<StructLikeWrapper, IcebergStatistics> partitionStatistics, List<PartitionField> partitionFields) {
    List<Type> partitionTypes = partitionTypes(partitionFields);
    List<? extends Class<?>> partitionColumnClass = partitionTypes.stream().map(type -> type.typeId().javaClass()).collect(toImmutableList());
    ImmutableList.Builder<List<Object>> records = ImmutableList.builder();
    for (Map.Entry<StructLikeWrapper, IcebergStatistics> partitionEntry : partitionStatistics.entrySet()) {
        StructLikeWrapper partitionStruct = partitionEntry.getKey();
        IcebergStatistics icebergStatistics = partitionEntry.getValue();
        List<Object> row = new ArrayList<>();
        // add data for partition columns
        partitionColumnType.ifPresent(partitionColumnType -> {
            BlockBuilder partitionRowBlockBuilder = partitionColumnType.createBlockBuilder(null, 1);
            BlockBuilder partitionBlockBuilder = partitionRowBlockBuilder.beginBlockEntry();
            for (int i = 0; i < partitionColumnTypes.size(); i++) {
                io.trino.spi.type.Type trinoType = partitionColumnType.getFields().get(i).getType();
                Object value = convertIcebergValueToTrino(partitionTypes.get(i), partitionStruct.get().get(i, partitionColumnClass.get(i)));
                writeNativeValue(trinoType, partitionBlockBuilder, value);
            }
            partitionRowBlockBuilder.closeEntry();
            row.add(partitionColumnType.getObject(partitionRowBlockBuilder, 0));
        });
        // add the top level metrics.
        row.add(icebergStatistics.getRecordCount());
        row.add(icebergStatistics.getFileCount());
        row.add(icebergStatistics.getSize());
        // add column level metrics
        dataColumnType.ifPresent(dataColumnType -> {
            BlockBuilder dataRowBlockBuilder = dataColumnType.createBlockBuilder(null, 1);
            BlockBuilder dataBlockBuilder = dataRowBlockBuilder.beginBlockEntry();
            for (int i = 0; i < columnMetricTypes.size(); i++) {
                Integer fieldId = nonPartitionPrimitiveColumns.get(i).fieldId();
                Object min = icebergStatistics.getMinValues().get(fieldId);
                Object max = icebergStatistics.getMaxValues().get(fieldId);
                Long nullCount = icebergStatistics.getNullCounts().get(fieldId);
                if (min == null && max == null && nullCount == null) {
                    row.add(null);
                    return;
                }
                RowType columnMetricType = columnMetricTypes.get(i);
                columnMetricType.writeObject(dataBlockBuilder, getColumnMetricBlock(columnMetricType, min, max, nullCount));
            }
            dataRowBlockBuilder.closeEntry();
            row.add(dataColumnType.getObject(dataRowBlockBuilder, 0));
        });
        records.add(row);
    }
    return new InMemoryRecordSet(resultTypes, records.build()).cursor();
}
Also used : Types(org.apache.iceberg.types.Types) TypeConverter.toTrinoType(io.trino.plugin.iceberg.TypeConverter.toTrinoType) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) HashMap(java.util.HashMap) StructLike(org.apache.iceberg.StructLike) PartitionField(org.apache.iceberg.PartitionField) ArrayList(java.util.ArrayList) IcebergTypes.convertIcebergValueToTrino(io.trino.plugin.iceberg.IcebergTypes.convertIcebergValueToTrino) ImmutableList(com.google.common.collect.ImmutableList) Block(io.trino.spi.block.Block) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) FileScanTask(org.apache.iceberg.FileScanTask) DataFile(org.apache.iceberg.DataFile) Collectors.toSet(java.util.stream.Collectors.toSet) RowType(io.trino.spi.type.RowType) RecordCursor(io.trino.spi.connector.RecordCursor) CloseableIterable(org.apache.iceberg.io.CloseableIterable) Table(org.apache.iceberg.Table) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TableScan(org.apache.iceberg.TableScan) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) TypeUtils.writeNativeValue(io.trino.spi.type.TypeUtils.writeNativeValue) TupleDomain(io.trino.spi.predicate.TupleDomain) Schema(org.apache.iceberg.Schema) InMemoryRecordSet(io.trino.spi.connector.InMemoryRecordSet) SchemaTableName(io.trino.spi.connector.SchemaTableName) Type(org.apache.iceberg.types.Type) UncheckedIOException(java.io.UncheckedIOException) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) IcebergUtil.getIdentityPartitions(io.trino.plugin.iceberg.IcebergUtil.getIdentityPartitions) Stream(java.util.stream.Stream) BIGINT(io.trino.spi.type.BigintType.BIGINT) Optional(java.util.Optional) IcebergUtil.primitiveFieldTypes(io.trino.plugin.iceberg.IcebergUtil.primitiveFieldTypes) StructLikeWrapper(org.apache.iceberg.util.StructLikeWrapper) BlockBuilder(io.trino.spi.block.BlockBuilder) TypeManager(io.trino.spi.type.TypeManager) SystemTable(io.trino.spi.connector.SystemTable) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) RowType(io.trino.spi.type.RowType) InMemoryRecordSet(io.trino.spi.connector.InMemoryRecordSet) TypeConverter.toTrinoType(io.trino.plugin.iceberg.TypeConverter.toTrinoType) RowType(io.trino.spi.type.RowType) Type(org.apache.iceberg.types.Type) StructLikeWrapper(org.apache.iceberg.util.StructLikeWrapper) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) BlockBuilder(io.trino.spi.block.BlockBuilder)

Example 28 with PartitionField

use of org.apache.iceberg.PartitionField in project trino by trinodb.

the class TableStatisticsMaker method dataFileMatches.

private boolean dataFileMatches(DataFile dataFile, Constraint constraint, List<PartitionField> partitionFields, Map<Integer, ColumnFieldDetails> fieldDetails) {
    // Currently this method is used only for IcebergMetadata.getTableStatistics and there Constraint never carries a predicate.
    // TODO support pruning with constraint when this changes.
    verify(constraint.predicate().isEmpty(), "Unexpected Constraint predicate");
    TupleDomain<ColumnHandle> constraintSummary = constraint.getSummary();
    Map<ColumnHandle, Domain> domains = constraintSummary.getDomains().get();
    for (int index = 0; index < partitionFields.size(); index++) {
        PartitionField field = partitionFields.get(index);
        int fieldId = field.fieldId();
        ColumnFieldDetails details = fieldDetails.get(fieldId);
        IcebergColumnHandle column = details.getColumnHandle();
        Object value = convertIcebergValueToTrino(details.getIcebergType(), dataFile.partition().get(index, details.getJavaClass()));
        Domain allowedDomain = domains.get(column);
        if (allowedDomain != null && !allowedDomain.includesNullableValue(value)) {
            return false;
        }
    }
    return true;
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) PartitionField(org.apache.iceberg.PartitionField) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) Constraint(io.trino.spi.connector.Constraint)

Example 29 with PartitionField

use of org.apache.iceberg.PartitionField in project trino by trinodb.

the class ManifestsTable method writePartitionSummaries.

private static void writePartitionSummaries(BlockBuilder arrayBlockBuilder, List<PartitionFieldSummary> summaries, PartitionSpec partitionSpec) {
    BlockBuilder singleArrayWriter = arrayBlockBuilder.beginBlockEntry();
    for (int i = 0; i < summaries.size(); i++) {
        PartitionFieldSummary summary = summaries.get(i);
        PartitionField field = partitionSpec.fields().get(i);
        Type nestedType = partitionSpec.partitionType().fields().get(i).type();
        BlockBuilder rowBuilder = singleArrayWriter.beginBlockEntry();
        BOOLEAN.writeBoolean(rowBuilder, summary.containsNull());
        BOOLEAN.writeBoolean(rowBuilder, summary.containsNaN());
        VARCHAR.writeString(rowBuilder, field.transform().toHumanString(Conversions.fromByteBuffer(nestedType, summary.lowerBound())));
        VARCHAR.writeString(rowBuilder, field.transform().toHumanString(Conversions.fromByteBuffer(nestedType, summary.upperBound())));
        singleArrayWriter.closeEntry();
    }
    arrayBlockBuilder.closeEntry();
}
Also used : PartitionField(org.apache.iceberg.PartitionField) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) Type(org.apache.iceberg.types.Type) PartitionFieldSummary(org.apache.iceberg.ManifestFile.PartitionFieldSummary) BlockBuilder(io.trino.spi.block.BlockBuilder)

Example 30 with PartitionField

use of org.apache.iceberg.PartitionField in project trino by trinodb.

the class TableStatisticsMaker method partitionTypes.

public List<Type> partitionTypes(List<PartitionField> partitionFields, Map<Integer, Type.PrimitiveType> idToTypeMapping) {
    ImmutableList.Builder<Type> partitionTypeBuilder = ImmutableList.builder();
    for (PartitionField partitionField : partitionFields) {
        Type.PrimitiveType sourceType = idToTypeMapping.get(partitionField.sourceId());
        Type type = partitionField.transform().getResultType(sourceType);
        partitionTypeBuilder.add(type);
    }
    return partitionTypeBuilder.build();
}
Also used : TypeConverter.toTrinoType(io.trino.plugin.iceberg.TypeConverter.toTrinoType) Type(org.apache.iceberg.types.Type) PartitionField(org.apache.iceberg.PartitionField) ImmutableList(com.google.common.collect.ImmutableList)

Aggregations

PartitionField (org.apache.iceberg.PartitionField)30 Type (org.apache.iceberg.types.Type)18 ImmutableList (com.google.common.collect.ImmutableList)13 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)11 IOException (java.io.IOException)11 UncheckedIOException (java.io.UncheckedIOException)9 List (java.util.List)9 PartitionSpec (org.apache.iceberg.PartitionSpec)9 TypeConverter.toPrestoType (com.facebook.presto.iceberg.TypeConverter.toPrestoType)8 Map (java.util.Map)8 DataFile (org.apache.iceberg.DataFile)8 FileScanTask (org.apache.iceberg.FileScanTask)8 Schema (org.apache.iceberg.Schema)8 Table (org.apache.iceberg.Table)8 TableScan (org.apache.iceberg.TableScan)8 Set (java.util.Set)7 Collectors (java.util.stream.Collectors)7 CloseableIterable (org.apache.iceberg.io.CloseableIterable)7 RowType (com.facebook.presto.common.type.RowType)6 HashMap (java.util.HashMap)6