Search in sources :

Example 11 with Type

use of org.apache.iceberg.types.Type in project hive by apache.

the class TestHiveIcebergInserts method testInsertSupportedTypes.

@Test
public void testInsertSupportedTypes() throws IOException {
    for (int i = 0; i < SUPPORTED_TYPES.size(); i++) {
        Type type = SUPPORTED_TYPES.get(i);
        // TODO: remove this filter when issue #1881 is resolved
        if (type == Types.UUIDType.get() && fileFormat == FileFormat.PARQUET) {
            continue;
        }
        // TODO: remove this filter when we figure out how we could test binary types
        if (type.equals(Types.BinaryType.get()) || type.equals(Types.FixedType.ofLength(5))) {
            continue;
        }
        String columnName = type.typeId().toString().toLowerCase() + "_column";
        Schema schema = new Schema(required(1, "id", Types.LongType.get()), required(2, columnName, type));
        List<Record> expected = TestHelper.generateRandomRecords(schema, 5, 0L);
        Table table = testTables.createTable(shell, type.typeId().toString().toLowerCase() + "_table_" + i, schema, PartitionSpec.unpartitioned(), fileFormat, expected);
        HiveIcebergTestUtils.validateData(table, expected, 0);
    }
}
Also used : Type(org.apache.iceberg.types.Type) Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) Record(org.apache.iceberg.data.Record) Test(org.junit.Test)

Example 12 with Type

use of org.apache.iceberg.types.Type in project presto by prestodb.

the class PartitionTable method buildRecordCursor.

private RecordCursor buildRecordCursor(Map<StructLikeWrapper, Partition> partitions, List<PartitionField> partitionFields) {
    List<Type> partitionTypes = partitionTypes(partitionFields);
    List<? extends Class<?>> partitionColumnClass = partitionTypes.stream().map(type -> type.typeId().javaClass()).collect(toImmutableList());
    int columnCounts = partitionColumnTypes.size() + 3 + columnMetricTypes.size();
    ImmutableList.Builder<List<Object>> records = ImmutableList.builder();
    for (Partition partition : partitions.values()) {
        List<Object> row = new ArrayList<>(columnCounts);
        // add data for partition columns
        for (int i = 0; i < partitionColumnTypes.size(); i++) {
            row.add(convert(partition.getValues().get(i, partitionColumnClass.get(i)), partitionTypes.get(i)));
        }
        // add the top level metrics.
        row.add(partition.getRecordCount());
        row.add(partition.getFileCount());
        row.add(partition.getSize());
        // add column level metrics
        for (int i = 0; i < columnMetricTypes.size(); i++) {
            if (!partition.hasValidColumnMetrics()) {
                row.add(null);
                continue;
            }
            Integer fieldId = nonPartitionPrimitiveColumns.get(i).fieldId();
            Type.PrimitiveType type = idToTypeMapping.get(fieldId);
            Object min = convert(partition.getMinValues().get(fieldId), type);
            Object max = convert(partition.getMaxValues().get(fieldId), type);
            Long nullCount = partition.getNullCounts().get(fieldId);
            row.add(getColumnMetricBlock(columnMetricTypes.get(i), min, max, nullCount));
        }
        records.add(row);
    }
    return new InMemoryRecordSet(resultTypes, records.build()).cursor();
}
Also used : Types(org.apache.iceberg.types.Types) HashMap(java.util.HashMap) StructLike(org.apache.iceberg.StructLike) InMemoryRecordSet(com.facebook.presto.spi.InMemoryRecordSet) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) ByteBuffer(java.nio.ByteBuffer) PartitionField(org.apache.iceberg.PartitionField) ArrayList(java.util.ArrayList) SchemaTableName(com.facebook.presto.spi.SchemaTableName) ImmutableList(com.google.common.collect.ImmutableList) TypeManager(com.facebook.presto.common.type.TypeManager) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) IcebergUtil.getIdentityPartitions(com.facebook.presto.iceberg.IcebergUtil.getIdentityPartitions) SystemTable(com.facebook.presto.spi.SystemTable) FileScanTask(org.apache.iceberg.FileScanTask) DataFile(org.apache.iceberg.DataFile) Collectors.toSet(java.util.stream.Collectors.toSet) TypeConverter.toPrestoType(com.facebook.presto.iceberg.TypeConverter.toPrestoType) ConnectorTableMetadata(com.facebook.presto.spi.ConnectorTableMetadata) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) BlockBuilder(com.facebook.presto.common.block.BlockBuilder) CloseableIterable(org.apache.iceberg.io.CloseableIterable) Table(org.apache.iceberg.Table) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TypeUtils(com.facebook.presto.common.type.TypeUtils) Set(java.util.Set) TableScan(org.apache.iceberg.TableScan) IOException(java.io.IOException) Schema(org.apache.iceberg.Schema) Collectors(java.util.stream.Collectors) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) ConnectorSession(com.facebook.presto.spi.ConnectorSession) Type(org.apache.iceberg.types.Type) UncheckedIOException(java.io.UncheckedIOException) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) ColumnMetadata(com.facebook.presto.spi.ColumnMetadata) Optional(java.util.Optional) StructLikeWrapper(org.apache.iceberg.util.StructLikeWrapper) Block(com.facebook.presto.common.block.Block) RowType(com.facebook.presto.common.type.RowType) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) InMemoryRecordSet(com.facebook.presto.spi.InMemoryRecordSet) TypeConverter.toPrestoType(com.facebook.presto.iceberg.TypeConverter.toPrestoType) Type(org.apache.iceberg.types.Type) RowType(com.facebook.presto.common.type.RowType) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List)

Example 13 with Type

use of org.apache.iceberg.types.Type in project presto by prestodb.

the class TableStatisticsMaker method makeTableStatistics.

private TableStatistics makeTableStatistics(IcebergTableHandle tableHandle, Constraint constraint) {
    if (!tableHandle.getSnapshotId().isPresent() || constraint.getSummary().isNone()) {
        return TableStatistics.empty();
    }
    TupleDomain<IcebergColumnHandle> intersection = constraint.getSummary().transform(IcebergColumnHandle.class::cast).intersect(tableHandle.getPredicate());
    if (intersection.isNone()) {
        return TableStatistics.empty();
    }
    List<Types.NestedField> columns = icebergTable.schema().columns();
    Map<Integer, Type.PrimitiveType> idToTypeMapping = columns.stream().filter(column -> column.type().isPrimitiveType()).collect(Collectors.toMap(Types.NestedField::fieldId, column -> column.type().asPrimitiveType()));
    List<PartitionField> partitionFields = icebergTable.spec().fields();
    Set<Integer> identityPartitionIds = getIdentityPartitions(icebergTable.spec()).keySet().stream().map(PartitionField::sourceId).collect(toSet());
    List<Types.NestedField> nonPartitionPrimitiveColumns = columns.stream().filter(column -> !identityPartitionIds.contains(column.fieldId()) && column.type().isPrimitiveType()).collect(toImmutableList());
    List<Type> icebergPartitionTypes = partitionTypes(partitionFields, idToTypeMapping);
    List<IcebergColumnHandle> columnHandles = getColumns(icebergTable.schema(), typeManager);
    Map<Integer, IcebergColumnHandle> idToColumnHandle = columnHandles.stream().collect(toImmutableMap(IcebergColumnHandle::getId, identity()));
    ImmutableMap.Builder<Integer, ColumnFieldDetails> idToDetailsBuilder = ImmutableMap.builder();
    for (int index = 0; index < partitionFields.size(); index++) {
        PartitionField field = partitionFields.get(index);
        Type type = icebergPartitionTypes.get(index);
        idToDetailsBuilder.put(field.sourceId(), new ColumnFieldDetails(field, idToColumnHandle.get(field.sourceId()), type, toPrestoType(type, typeManager), type.typeId().javaClass()));
    }
    Map<Integer, ColumnFieldDetails> idToDetails = idToDetailsBuilder.build();
    TableScan tableScan = icebergTable.newScan().filter(toIcebergExpression(intersection)).useSnapshot(tableHandle.getSnapshotId().get()).includeColumnStats();
    Partition summary = null;
    try (CloseableIterable<FileScanTask> fileScanTasks = tableScan.planFiles()) {
        for (FileScanTask fileScanTask : fileScanTasks) {
            DataFile dataFile = fileScanTask.file();
            if (!dataFileMatches(dataFile, constraint, idToTypeMapping, partitionFields, idToDetails)) {
                continue;
            }
            if (summary == null) {
                summary = new Partition(idToTypeMapping, nonPartitionPrimitiveColumns, dataFile.partition(), dataFile.recordCount(), dataFile.fileSizeInBytes(), toMap(idToTypeMapping, dataFile.lowerBounds()), toMap(idToTypeMapping, dataFile.upperBounds()), dataFile.nullValueCounts(), dataFile.columnSizes());
            } else {
                summary.incrementFileCount();
                summary.incrementRecordCount(dataFile.recordCount());
                summary.incrementSize(dataFile.fileSizeInBytes());
                updateSummaryMin(summary, partitionFields, toMap(idToTypeMapping, dataFile.lowerBounds()), dataFile.nullValueCounts(), dataFile.recordCount());
                updateSummaryMax(summary, partitionFields, toMap(idToTypeMapping, dataFile.upperBounds()), dataFile.nullValueCounts(), dataFile.recordCount());
                summary.updateNullCount(dataFile.nullValueCounts());
                updateColumnSizes(summary, dataFile.columnSizes());
            }
        }
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
    if (summary == null) {
        return TableStatistics.empty();
    }
    double recordCount = summary.getRecordCount();
    TableStatistics.Builder result = TableStatistics.builder();
    result.setRowCount(Estimate.of(recordCount));
    result.setTotalSize(Estimate.of(summary.getSize()));
    for (IcebergColumnHandle columnHandle : idToColumnHandle.values()) {
        int fieldId = columnHandle.getId();
        ColumnStatistics.Builder columnBuilder = new ColumnStatistics.Builder();
        Long nullCount = summary.getNullCounts().get(fieldId);
        if (nullCount != null) {
            columnBuilder.setNullsFraction(Estimate.of(nullCount / recordCount));
        }
        if (summary.getColumnSizes() != null) {
            Long columnSize = summary.getColumnSizes().get(fieldId);
            if (columnSize != null) {
                columnBuilder.setDataSize(Estimate.of(columnSize));
            }
        }
        Object min = summary.getMinValues().get(fieldId);
        Object max = summary.getMaxValues().get(fieldId);
        if (min instanceof Number && max instanceof Number) {
            columnBuilder.setRange(Optional.of(new DoubleRange(((Number) min).doubleValue(), ((Number) max).doubleValue())));
        }
        result.setColumnStatistics(columnHandle, columnBuilder.build());
    }
    return result.build();
}
Also used : Types(org.apache.iceberg.types.Types) ColumnStatistics(com.facebook.presto.spi.statistics.ColumnStatistics) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) PartitionField(org.apache.iceberg.PartitionField) DoubleRange(com.facebook.presto.spi.statistics.DoubleRange) ImmutableList(com.google.common.collect.ImmutableList) Partition.toMap(com.facebook.presto.iceberg.Partition.toMap) TypeManager(com.facebook.presto.common.type.TypeManager) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) IcebergUtil.getIdentityPartitions(com.facebook.presto.iceberg.IcebergUtil.getIdentityPartitions) FileScanTask(org.apache.iceberg.FileScanTask) DataFile(org.apache.iceberg.DataFile) ExpressionConverter.toIcebergExpression(com.facebook.presto.iceberg.ExpressionConverter.toIcebergExpression) IcebergUtil.getColumns(com.facebook.presto.iceberg.IcebergUtil.getColumns) Collectors.toSet(java.util.stream.Collectors.toSet) Comparators(org.apache.iceberg.types.Comparators) TypeConverter.toPrestoType(com.facebook.presto.iceberg.TypeConverter.toPrestoType) CloseableIterable(org.apache.iceberg.io.CloseableIterable) NullableValue(com.facebook.presto.common.predicate.NullableValue) ImmutableMap(com.google.common.collect.ImmutableMap) Table(org.apache.iceberg.Table) Predicate(java.util.function.Predicate) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) Constraint(com.facebook.presto.spi.Constraint) TableScan(org.apache.iceberg.TableScan) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) Type(org.apache.iceberg.types.Type) UncheckedIOException(java.io.UncheckedIOException) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Estimate(com.facebook.presto.spi.statistics.Estimate) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) Comparator(java.util.Comparator) Types(org.apache.iceberg.types.Types) UncheckedIOException(java.io.UncheckedIOException) DataFile(org.apache.iceberg.DataFile) PartitionField(org.apache.iceberg.PartitionField) ColumnStatistics(com.facebook.presto.spi.statistics.ColumnStatistics) TableScan(org.apache.iceberg.TableScan) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Constraint(com.facebook.presto.spi.Constraint) DoubleRange(com.facebook.presto.spi.statistics.DoubleRange) TypeConverter.toPrestoType(com.facebook.presto.iceberg.TypeConverter.toPrestoType) Type(org.apache.iceberg.types.Type) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) FileScanTask(org.apache.iceberg.FileScanTask)

Example 14 with Type

use of org.apache.iceberg.types.Type in project presto by prestodb.

the class TableStatisticsMaker method partitionTypes.

public List<Type> partitionTypes(List<PartitionField> partitionFields, Map<Integer, Type.PrimitiveType> idToTypeMapping) {
    ImmutableList.Builder<Type> partitionTypeBuilder = ImmutableList.builder();
    for (PartitionField partitionField : partitionFields) {
        Type.PrimitiveType sourceType = idToTypeMapping.get(partitionField.sourceId());
        Type type = partitionField.transform().getResultType(sourceType);
        partitionTypeBuilder.add(type);
    }
    return partitionTypeBuilder.build();
}
Also used : TypeConverter.toPrestoType(com.facebook.presto.iceberg.TypeConverter.toPrestoType) Type(org.apache.iceberg.types.Type) PartitionField(org.apache.iceberg.PartitionField) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList)

Example 15 with Type

use of org.apache.iceberg.types.Type in project presto by prestodb.

the class IcebergAbstractMetadata method finishInsert.

@Override
public Optional<ConnectorOutputMetadata> finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) {
    if (fragments.isEmpty()) {
        transaction.commitTransaction();
        return Optional.empty();
    }
    IcebergWritableTableHandle table = (IcebergWritableTableHandle) insertHandle;
    org.apache.iceberg.Table icebergTable = transaction.table();
    List<CommitTaskData> commitTasks = fragments.stream().map(slice -> commitTaskCodec.fromJson(slice.getBytes())).collect(toImmutableList());
    Type[] partitionColumnTypes = icebergTable.spec().fields().stream().map(field -> field.transform().getResultType(icebergTable.schema().findType(field.sourceId()))).toArray(Type[]::new);
    AppendFiles appendFiles = transaction.newFastAppend();
    for (CommitTaskData task : commitTasks) {
        DataFiles.Builder builder = DataFiles.builder(icebergTable.spec()).withPath(task.getPath()).withFileSizeInBytes(task.getFileSizeInBytes()).withFormat(table.getFileFormat()).withMetrics(task.getMetrics().metrics());
        if (!icebergTable.spec().fields().isEmpty()) {
            String partitionDataJson = task.getPartitionDataJson().orElseThrow(() -> new VerifyException("No partition data for partitioned table"));
            builder.withPartition(PartitionData.fromJson(partitionDataJson, partitionColumnTypes));
        }
        appendFiles.appendFile(builder.build());
    }
    appendFiles.commit();
    transaction.commitTransaction();
    return Optional.of(new HiveWrittenPartitions(commitTasks.stream().map(CommitTaskData::getPath).collect(toImmutableList())));
}
Also used : JsonCodec(com.facebook.airlift.json.JsonCodec) TypeUtil(org.apache.iceberg.types.TypeUtil) Types(org.apache.iceberg.types.Types) ComputedStatistics(com.facebook.presto.spi.statistics.ComputedStatistics) AppendFiles(org.apache.iceberg.AppendFiles) Collections.singletonList(java.util.Collections.singletonList) SchemaTableName(com.facebook.presto.spi.SchemaTableName) DataFiles(org.apache.iceberg.DataFiles) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) PartitionFields.toPartitionFields(com.facebook.presto.iceberg.PartitionFields.toPartitionFields) FILE_FORMAT_PROPERTY(com.facebook.presto.iceberg.IcebergTableProperties.FILE_FORMAT_PROPERTY) SystemTable(com.facebook.presto.spi.SystemTable) TypeConverter.toPrestoType(com.facebook.presto.iceberg.TypeConverter.toPrestoType) ImmutableMap(com.google.common.collect.ImmutableMap) Collection(java.util.Collection) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) PARTITIONING_PROPERTY(com.facebook.presto.iceberg.IcebergTableProperties.PARTITIONING_PROPERTY) Set(java.util.Set) Schema(org.apache.iceberg.Schema) PartitionSpecParser(org.apache.iceberg.PartitionSpecParser) SchemaParser(org.apache.iceberg.SchemaParser) ConnectorSession(com.facebook.presto.spi.ConnectorSession) Type(org.apache.iceberg.types.Type) List(java.util.List) ColumnMetadata(com.facebook.presto.spi.ColumnMetadata) TypeConverter.toIcebergType(com.facebook.presto.iceberg.TypeConverter.toIcebergType) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) Optional(java.util.Optional) HiveWrittenPartitions(com.facebook.presto.hive.HiveWrittenPartitions) ConnectorMetadata(com.facebook.presto.spi.connector.ConnectorMetadata) Logger(com.facebook.airlift.log.Logger) Slice(io.airlift.slice.Slice) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) ConnectorOutputTableHandle(com.facebook.presto.spi.ConnectorOutputTableHandle) PrestoException(com.facebook.presto.spi.PrestoException) ArrayList(java.util.ArrayList) IcebergUtil.getFileFormat(com.facebook.presto.iceberg.IcebergUtil.getFileFormat) IcebergUtil.resolveSnapshotIdByName(com.facebook.presto.iceberg.IcebergUtil.resolveSnapshotIdByName) ImmutableList(com.google.common.collect.ImmutableList) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) IcebergUtil.getColumns(com.facebook.presto.iceberg.IcebergUtil.getColumns) ConnectorInsertTableHandle(com.facebook.presto.spi.ConnectorInsertTableHandle) ConnectorTableMetadata(com.facebook.presto.spi.ConnectorTableMetadata) VerifyException(com.google.common.base.VerifyException) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) ConnectorOutputMetadata(com.facebook.presto.spi.connector.ConnectorOutputMetadata) ConnectorTableLayout(com.facebook.presto.spi.ConnectorTableLayout) Constraint(com.facebook.presto.spi.Constraint) ConnectorTableLayoutResult(com.facebook.presto.spi.ConnectorTableLayoutResult) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) SchemaTablePrefix(com.facebook.presto.spi.SchemaTablePrefix) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Transaction(org.apache.iceberg.Transaction) ConnectorNewTableLayout(com.facebook.presto.spi.ConnectorNewTableLayout) AppendFiles(org.apache.iceberg.AppendFiles) DataFiles(org.apache.iceberg.DataFiles) HiveWrittenPartitions(com.facebook.presto.hive.HiveWrittenPartitions) TypeConverter.toPrestoType(com.facebook.presto.iceberg.TypeConverter.toPrestoType) Type(org.apache.iceberg.types.Type) TypeConverter.toIcebergType(com.facebook.presto.iceberg.TypeConverter.toIcebergType) VerifyException(com.google.common.base.VerifyException)

Aggregations

Type (org.apache.iceberg.types.Type)20 Schema (org.apache.iceberg.Schema)7 TypeConverter.toPrestoType (com.facebook.presto.iceberg.TypeConverter.toPrestoType)6 PartitionField (org.apache.iceberg.PartitionField)6 Types (org.apache.iceberg.types.Types)6 Test (org.junit.Test)6 ImmutableList (com.google.common.collect.ImmutableList)5 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)5 List (java.util.List)5 Map (java.util.Map)5 Table (org.apache.iceberg.Table)5 ArrayType (com.facebook.presto.common.type.ArrayType)3 RowType (com.facebook.presto.common.type.RowType)3 TypeManager (com.facebook.presto.common.type.TypeManager)3 ColumnMetadata (com.facebook.presto.spi.ColumnMetadata)3 Constraint (com.facebook.presto.spi.Constraint)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 Objects.requireNonNull (java.util.Objects.requireNonNull)3