Examples with PartitionSpec - org.apache.iceberg.PartitionSpec

Example 1 with PartitionSpec

use of org.apache.iceberg.PartitionSpec in project drill by apache.

the class IcebergTableSchema method of.

/**
 * Based on given class fields annotated with {@link MetastoreFieldDefinition}
 * generates Iceberg table schema and its partition specification.
 *
 * @param clazz base class for Iceberg schema
 * @param partitionKeys list of partition keys
 * @return instance of Iceberg table schema
 */
public static IcebergTableSchema of(Class<?> clazz, List<MetastoreColumn> partitionKeys) {
    List<Types.NestedField> tableSchemaFields = new ArrayList<>();
    Types.NestedField[] partitionSpecSchemaFields = new Types.NestedField[partitionKeys.size()];
    int schemaIndex = STARTING_SCHEMA_INDEX;
    int complexTypesIndex = STARTING_COMPLEX_TYPES_INDEX;
    for (Field field : clazz.getDeclaredFields()) {
        MetastoreFieldDefinition definition = field.getAnnotation(MetastoreFieldDefinition.class);
        if (definition == null) {
            continue;
        }
        MetastoreColumn column = definition.column();
        String typeSimpleName = field.getType().getSimpleName().toLowerCase();
        org.apache.iceberg.types.Type icebergType = JAVA_TO_ICEBERG_TYPE_MAP.get(typeSimpleName);
        if (icebergType == null && field.getAnnotatedType().getType() instanceof ParameterizedType) {
            Type[] actualTypeArguments = ((ParameterizedType) field.getAnnotatedType().getType()).getActualTypeArguments();
            switch(typeSimpleName) {
                case "list":
                    org.apache.iceberg.types.Type listIcebergType = getGenericsType(actualTypeArguments[0]);
                    icebergType = Types.ListType.ofOptional(complexTypesIndex++, listIcebergType);
                    break;
                case "map":
                    org.apache.iceberg.types.Type keyIcebergType = getGenericsType(actualTypeArguments[0]);
                    org.apache.iceberg.types.Type valueIcebergType = getGenericsType(actualTypeArguments[1]);
                    icebergType = Types.MapType.ofOptional(complexTypesIndex++, complexTypesIndex++, keyIcebergType, valueIcebergType);
                    break;
                default:
                    throw new IcebergMetastoreException(String.format("Unexpected parametrized type for class [%s]: %s", clazz.getCanonicalName(), typeSimpleName));
            }
        }
        if (icebergType == null) {
            throw new IcebergMetastoreException(String.format("Unexpected type for class [%s]: %s", clazz.getCanonicalName(), typeSimpleName));
        }
        Types.NestedField icebergField = Types.NestedField.optional(schemaIndex++, column.columnName(), icebergType);
        tableSchemaFields.add(icebergField);
        int partitionIndex = partitionKeys.indexOf(column);
        if (partitionIndex != -1) {
            partitionSpecSchemaFields[partitionIndex] = icebergField;
        }
    }
    if (Stream.of(partitionSpecSchemaFields).anyMatch(Objects::isNull)) {
        throw new IcebergMetastoreException(String.format("Some of partition fields are missing in the class [%s]. Partition keys: %s. Partition values: %s.", clazz.getCanonicalName(), partitionKeys, Arrays.asList(partitionSpecSchemaFields)));
    }
    Schema tableSchema = new Schema(tableSchemaFields);
    PartitionSpec partitionSpec = buildPartitionSpec(partitionSpecSchemaFields);
    logger.debug("Constructed Iceberg table schema for class [{}]. Table schema : {}. Partition spec: {}.", clazz.getCanonicalName(), tableSchema, partitionSpec);
    return new IcebergTableSchema(tableSchema, partitionSpec);
}

Also used : IcebergMetastoreException(org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException) Types(org.apache.iceberg.types.Types) Schema(org.apache.iceberg.Schema) ArrayList(java.util.ArrayList) MetastoreFieldDefinition(org.apache.drill.metastore.MetastoreFieldDefinition) PartitionSpec(org.apache.iceberg.PartitionSpec) MetastoreColumn(org.apache.drill.metastore.MetastoreColumn) ParameterizedType(java.lang.reflect.ParameterizedType) Field(java.lang.reflect.Field) ParameterizedType(java.lang.reflect.ParameterizedType) Type(java.lang.reflect.Type) Objects(java.util.Objects)

Example 2 with PartitionSpec

use of org.apache.iceberg.PartitionSpec in project drill by apache.

the class TestIcebergTableSchema method testPartitionedPartitionSpec.

@Test
public void testPartitionedPartitionSpec() {
    Class<?> clazz = new ClassGenerator(getClass().getSimpleName() + "PartitionedPartitionSpec") {

        @Override
        void addFields(ClassWriter classWriter) {
            FieldVisitor partKey1 = addField(classWriter, Opcodes.ACC_PRIVATE, MetastoreColumn.STORAGE_PLUGIN, String.class);
            annotate(partKey1, MetastoreColumn.STORAGE_PLUGIN, MetadataType.ALL);
            FieldVisitor partKey2 = addField(classWriter, Opcodes.ACC_PRIVATE, MetastoreColumn.WORKSPACE, String.class);
            annotate(partKey2, MetastoreColumn.WORKSPACE, MetadataType.ALL);
            FieldVisitor partKey3 = addField(classWriter, Opcodes.ACC_PRIVATE, MetastoreColumn.TABLE_NAME, String.class);
            annotate(partKey3, MetastoreColumn.TABLE_NAME, MetadataType.ALL);
            FieldVisitor integerField = addField(classWriter, Opcodes.ACC_PRIVATE, MetastoreColumn.ROW_GROUP_INDEX, Integer.class);
            annotate(integerField, MetastoreColumn.ROW_GROUP_INDEX, MetadataType.ROW_GROUP);
            FieldVisitor stringField = addField(classWriter, Opcodes.ACC_PRIVATE, MetastoreColumn.OWNER, Boolean.class);
            annotate(stringField, MetastoreColumn.OWNER, MetadataType.TABLE);
        }
    }.generate();
    IcebergTableSchema schema = IcebergTableSchema.of(clazz, Arrays.asList(MetastoreColumn.STORAGE_PLUGIN, MetastoreColumn.WORKSPACE, MetastoreColumn.TABLE_NAME));
    Types.NestedField partKey1 = schema.tableSchema().findField(MetastoreColumn.STORAGE_PLUGIN.columnName());
    assertNotNull(partKey1);
    Types.NestedField partKey2 = schema.tableSchema().findField(MetastoreColumn.WORKSPACE.columnName());
    assertNotNull(partKey2);
    Types.NestedField partKey3 = schema.tableSchema().findField(MetastoreColumn.TABLE_NAME.columnName());
    assertNotNull(partKey3);
    assertNotNull(schema.tableSchema().findField(MetastoreColumn.ROW_GROUP_INDEX.columnName()));
    assertNotNull(schema.tableSchema().findField(MetastoreColumn.OWNER.columnName()));
    Schema partitionSchema = new Schema(partKey1, partKey2, partKey3);
    PartitionSpec expectedPartitionSpec = PartitionSpec.builderFor(partitionSchema).identity(partKey1.name()).identity(partKey2.name()).identity(partKey3.name()).build();
    assertEquals(expectedPartitionSpec, schema.partitionSpec());
}

Also used : Types(org.apache.iceberg.types.Types) Schema(org.apache.iceberg.Schema) FieldVisitor(org.objectweb.asm.FieldVisitor) PartitionSpec(org.apache.iceberg.PartitionSpec) ClassWriter(org.objectweb.asm.ClassWriter) IcebergBaseTest(org.apache.drill.metastore.iceberg.IcebergBaseTest) Test(org.junit.Test)

Example 3 with PartitionSpec

use of org.apache.iceberg.PartitionSpec in project presto by prestodb.

the class IcebergPageSinkProvider method createPageSink.

private ConnectorPageSink createPageSink(ConnectorSession session, IcebergWritableTableHandle tableHandle) {
    HdfsContext hdfsContext = new HdfsContext(session, tableHandle.getSchemaName(), tableHandle.getTableName());
    Schema schema = SchemaParser.fromJson(tableHandle.getSchemaAsJson());
    PartitionSpec partitionSpec = PartitionSpecParser.fromJson(schema, tableHandle.getPartitionSpecAsJson());
    LocationProvider locationProvider = getLocationProvider(new SchemaTableName(tableHandle.getSchemaName(), tableHandle.getTableName()), tableHandle.getOutputPath(), tableHandle.getStorageProperties());
    return new IcebergPageSink(schema, partitionSpec, locationProvider, fileWriterFactory, pageIndexerFactory, hdfsEnvironment, hdfsContext, tableHandle.getInputColumns(), jsonCodec, session, tableHandle.getFileFormat());
}

Also used : IcebergUtil.getLocationProvider(com.facebook.presto.iceberg.IcebergUtil.getLocationProvider) LocationProvider(org.apache.iceberg.io.LocationProvider) Schema(org.apache.iceberg.Schema) HdfsContext(com.facebook.presto.hive.HdfsContext) PartitionSpec(org.apache.iceberg.PartitionSpec) SchemaTableName(com.facebook.presto.spi.SchemaTableName)

Example 4 with PartitionSpec

use of org.apache.iceberg.PartitionSpec in project presto by prestodb.

the class IcebergHadoopMetadata method beginCreateTable.

@Override
public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, Optional<ConnectorNewTableLayout> layout) {
    SchemaTableName schemaTableName = tableMetadata.getTable();
    String schemaName = schemaTableName.getSchemaName();
    String tableName = schemaTableName.getTableName();
    Schema schema = toIcebergSchema(tableMetadata.getColumns());
    PartitionSpec partitionSpec = parsePartitionFields(schema, getPartitioning(tableMetadata.getProperties()));
    ImmutableMap.Builder<String, String> propertiesBuilder = ImmutableMap.builder();
    FileFormat fileFormat = getFileFormat(tableMetadata.getProperties());
    propertiesBuilder.put(DEFAULT_FILE_FORMAT, fileFormat.toString());
    if (tableMetadata.getComment().isPresent()) {
        propertiesBuilder.put(TABLE_COMMENT, tableMetadata.getComment().get());
    }
    String formatVersion = getFormatVersion(tableMetadata.getProperties());
    if (formatVersion != null) {
        propertiesBuilder.put(FORMAT_VERSION, formatVersion);
    }
    try {
        transaction = resourceFactory.getCatalog(session).newCreateTableTransaction(toIcebergTableIdentifier(schemaTableName), schema, partitionSpec, propertiesBuilder.build());
    } catch (AlreadyExistsException e) {
        throw new TableAlreadyExistsException(schemaTableName);
    }
    Table icebergTable = transaction.table();
    return new IcebergWritableTableHandle(schemaName, tableName, SchemaParser.toJson(icebergTable.schema()), PartitionSpecParser.toJson(icebergTable.spec()), getColumns(icebergTable.schema(), typeManager), icebergTable.location(), fileFormat, icebergTable.properties());
}

Also used : TableAlreadyExistsException(com.facebook.presto.hive.TableAlreadyExistsException) SystemTable(com.facebook.presto.spi.SystemTable) IcebergUtil.getHadoopIcebergTable(com.facebook.presto.iceberg.IcebergUtil.getHadoopIcebergTable) Table(org.apache.iceberg.Table) AlreadyExistsException(org.apache.iceberg.exceptions.AlreadyExistsException) TableAlreadyExistsException(com.facebook.presto.hive.TableAlreadyExistsException) Schema(org.apache.iceberg.Schema) FileFormat(org.apache.iceberg.FileFormat) IcebergTableProperties.getFileFormat(com.facebook.presto.iceberg.IcebergTableProperties.getFileFormat) SchemaTableName(com.facebook.presto.spi.SchemaTableName) PartitionSpec(org.apache.iceberg.PartitionSpec) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap)

Example 5 with PartitionSpec

use of org.apache.iceberg.PartitionSpec in project presto by prestodb.

the class ManifestsTable method buildPages.

private static List<Page> buildPages(ConnectorTableMetadata tableMetadata, Table icebergTable, long snapshotId) {
    PageListBuilder pagesBuilder = PageListBuilder.forTable(tableMetadata);
    Snapshot snapshot = icebergTable.snapshot(snapshotId);
    if (snapshot == null) {
        throw new PrestoException(ICEBERG_INVALID_METADATA, format("Snapshot ID [%s] does not exist for table: %s", snapshotId, icebergTable));
    }
    Map<Integer, PartitionSpec> partitionSpecsById = icebergTable.specs();
    snapshot.allManifests().forEach(file -> {
        pagesBuilder.beginRow();
        pagesBuilder.appendVarchar(file.path());
        pagesBuilder.appendBigint(file.length());
        pagesBuilder.appendInteger(file.partitionSpecId());
        pagesBuilder.appendBigint(file.snapshotId());
        pagesBuilder.appendInteger(file.addedFilesCount());
        pagesBuilder.appendInteger(file.existingFilesCount());
        pagesBuilder.appendInteger(file.deletedFilesCount());
        writePartitionSummaries(pagesBuilder.nextColumn(), file.partitions(), partitionSpecsById.get(file.partitionSpecId()));
        pagesBuilder.endRow();
    });
    return pagesBuilder.build();
}

Also used : PageListBuilder(com.facebook.presto.iceberg.util.PageListBuilder) Snapshot(org.apache.iceberg.Snapshot) PrestoException(com.facebook.presto.spi.PrestoException) PartitionSpec(org.apache.iceberg.PartitionSpec)

Aggregations

PartitionSpec (org.apache.iceberg.PartitionSpec)59 Test (org.junit.Test)39 Table (org.apache.iceberg.Table)38 Schema (org.apache.iceberg.Schema)37 Record (org.apache.iceberg.data.Record)19 TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)18 List (java.util.List)10 FileFormat (org.apache.iceberg.FileFormat)9 ArrayList (java.util.ArrayList)8 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)8 ImmutableList (org.apache.iceberg.relocated.com.google.common.collect.ImmutableList)7 IOException (java.io.IOException)6 UpdateSchema (org.apache.iceberg.UpdateSchema)6 BaseTable (org.apache.iceberg.BaseTable)5 Path (org.apache.hadoop.fs.Path)4 PartitionField (org.apache.iceberg.PartitionField)4 Types (org.apache.iceberg.types.Types)4 HdfsContext (com.facebook.presto.hive.HdfsContext)3 PrestoException (com.facebook.presto.spi.PrestoException)3 Map (java.util.Map)3