use of org.apache.iceberg.PartitionField in project iceberg by apache.
the class SortOrderUtil method buildSortOrder.
public static SortOrder buildSortOrder(Schema schema, PartitionSpec spec, SortOrder sortOrder) {
if (sortOrder.isUnsorted() && spec.isUnpartitioned()) {
return SortOrder.unsorted();
}
Multimap<Integer, SortField> sortFieldIndex = Multimaps.index(sortOrder.fields(), SortField::sourceId);
// build a sort prefix of partition fields that are not already in the sort order
SortOrder.Builder builder = SortOrder.builderFor(schema);
for (PartitionField field : spec.fields()) {
Collection<SortField> sortFields = sortFieldIndex.get(field.sourceId());
boolean isSorted = sortFields.stream().anyMatch(sortField -> field.transform().equals(sortField.transform()) || sortField.transform().satisfiesOrderOf(field.transform()));
if (!isSorted) {
String sourceName = schema.findColumnName(field.sourceId());
builder.asc(Expressions.transform(sourceName, field.transform()));
}
}
// add the configured sort to the partition spec prefix sort
SortOrderVisitor.visit(sortOrder, new CopySortOrderFields(builder));
return builder.build();
}
use of org.apache.iceberg.PartitionField in project trino by trinodb.
the class PartitionTable method buildRecordCursor.
private RecordCursor buildRecordCursor(Map<StructLikeWrapper, IcebergStatistics> partitionStatistics, List<PartitionField> partitionFields) {
List<Type> partitionTypes = partitionTypes(partitionFields);
List<? extends Class<?>> partitionColumnClass = partitionTypes.stream().map(type -> type.typeId().javaClass()).collect(toImmutableList());
ImmutableList.Builder<List<Object>> records = ImmutableList.builder();
for (Map.Entry<StructLikeWrapper, IcebergStatistics> partitionEntry : partitionStatistics.entrySet()) {
StructLikeWrapper partitionStruct = partitionEntry.getKey();
IcebergStatistics icebergStatistics = partitionEntry.getValue();
List<Object> row = new ArrayList<>();
// add data for partition columns
partitionColumnType.ifPresent(partitionColumnType -> {
BlockBuilder partitionRowBlockBuilder = partitionColumnType.createBlockBuilder(null, 1);
BlockBuilder partitionBlockBuilder = partitionRowBlockBuilder.beginBlockEntry();
for (int i = 0; i < partitionColumnTypes.size(); i++) {
io.trino.spi.type.Type trinoType = partitionColumnType.getFields().get(i).getType();
Object value = convertIcebergValueToTrino(partitionTypes.get(i), partitionStruct.get().get(i, partitionColumnClass.get(i)));
writeNativeValue(trinoType, partitionBlockBuilder, value);
}
partitionRowBlockBuilder.closeEntry();
row.add(partitionColumnType.getObject(partitionRowBlockBuilder, 0));
});
// add the top level metrics.
row.add(icebergStatistics.getRecordCount());
row.add(icebergStatistics.getFileCount());
row.add(icebergStatistics.getSize());
// add column level metrics
dataColumnType.ifPresent(dataColumnType -> {
BlockBuilder dataRowBlockBuilder = dataColumnType.createBlockBuilder(null, 1);
BlockBuilder dataBlockBuilder = dataRowBlockBuilder.beginBlockEntry();
for (int i = 0; i < columnMetricTypes.size(); i++) {
Integer fieldId = nonPartitionPrimitiveColumns.get(i).fieldId();
Object min = icebergStatistics.getMinValues().get(fieldId);
Object max = icebergStatistics.getMaxValues().get(fieldId);
Long nullCount = icebergStatistics.getNullCounts().get(fieldId);
if (min == null && max == null && nullCount == null) {
row.add(null);
return;
}
RowType columnMetricType = columnMetricTypes.get(i);
columnMetricType.writeObject(dataBlockBuilder, getColumnMetricBlock(columnMetricType, min, max, nullCount));
}
dataRowBlockBuilder.closeEntry();
row.add(dataColumnType.getObject(dataRowBlockBuilder, 0));
});
records.add(row);
}
return new InMemoryRecordSet(resultTypes, records.build()).cursor();
}
use of org.apache.iceberg.PartitionField in project trino by trinodb.
the class TableStatisticsMaker method dataFileMatches.
private boolean dataFileMatches(DataFile dataFile, Constraint constraint, List<PartitionField> partitionFields, Map<Integer, ColumnFieldDetails> fieldDetails) {
// Currently this method is used only for IcebergMetadata.getTableStatistics and there Constraint never carries a predicate.
// TODO support pruning with constraint when this changes.
verify(constraint.predicate().isEmpty(), "Unexpected Constraint predicate");
TupleDomain<ColumnHandle> constraintSummary = constraint.getSummary();
Map<ColumnHandle, Domain> domains = constraintSummary.getDomains().get();
for (int index = 0; index < partitionFields.size(); index++) {
PartitionField field = partitionFields.get(index);
int fieldId = field.fieldId();
ColumnFieldDetails details = fieldDetails.get(fieldId);
IcebergColumnHandle column = details.getColumnHandle();
Object value = convertIcebergValueToTrino(details.getIcebergType(), dataFile.partition().get(index, details.getJavaClass()));
Domain allowedDomain = domains.get(column);
if (allowedDomain != null && !allowedDomain.includesNullableValue(value)) {
return false;
}
}
return true;
}
use of org.apache.iceberg.PartitionField in project trino by trinodb.
the class ManifestsTable method writePartitionSummaries.
private static void writePartitionSummaries(BlockBuilder arrayBlockBuilder, List<PartitionFieldSummary> summaries, PartitionSpec partitionSpec) {
BlockBuilder singleArrayWriter = arrayBlockBuilder.beginBlockEntry();
for (int i = 0; i < summaries.size(); i++) {
PartitionFieldSummary summary = summaries.get(i);
PartitionField field = partitionSpec.fields().get(i);
Type nestedType = partitionSpec.partitionType().fields().get(i).type();
BlockBuilder rowBuilder = singleArrayWriter.beginBlockEntry();
BOOLEAN.writeBoolean(rowBuilder, summary.containsNull());
BOOLEAN.writeBoolean(rowBuilder, summary.containsNaN());
VARCHAR.writeString(rowBuilder, field.transform().toHumanString(Conversions.fromByteBuffer(nestedType, summary.lowerBound())));
VARCHAR.writeString(rowBuilder, field.transform().toHumanString(Conversions.fromByteBuffer(nestedType, summary.upperBound())));
singleArrayWriter.closeEntry();
}
arrayBlockBuilder.closeEntry();
}
use of org.apache.iceberg.PartitionField in project trino by trinodb.
the class TableStatisticsMaker method partitionTypes.
public List<Type> partitionTypes(List<PartitionField> partitionFields, Map<Integer, Type.PrimitiveType> idToTypeMapping) {
ImmutableList.Builder<Type> partitionTypeBuilder = ImmutableList.builder();
for (PartitionField partitionField : partitionFields) {
Type.PrimitiveType sourceType = idToTypeMapping.get(partitionField.sourceId());
Type type = partitionField.transform().getResultType(sourceType);
partitionTypeBuilder.add(type);
}
return partitionTypeBuilder.build();
}
Aggregations