use of org.apache.iceberg.PartitionSpec in project drill by apache.
the class IcebergTableSchema method of.
/**
* Based on given class fields annotated with {@link MetastoreFieldDefinition}
* generates Iceberg table schema and its partition specification.
*
* @param clazz base class for Iceberg schema
* @param partitionKeys list of partition keys
* @return instance of Iceberg table schema
*/
public static IcebergTableSchema of(Class<?> clazz, List<MetastoreColumn> partitionKeys) {
List<Types.NestedField> tableSchemaFields = new ArrayList<>();
Types.NestedField[] partitionSpecSchemaFields = new Types.NestedField[partitionKeys.size()];
int schemaIndex = STARTING_SCHEMA_INDEX;
int complexTypesIndex = STARTING_COMPLEX_TYPES_INDEX;
for (Field field : clazz.getDeclaredFields()) {
MetastoreFieldDefinition definition = field.getAnnotation(MetastoreFieldDefinition.class);
if (definition == null) {
continue;
}
MetastoreColumn column = definition.column();
String typeSimpleName = field.getType().getSimpleName().toLowerCase();
org.apache.iceberg.types.Type icebergType = JAVA_TO_ICEBERG_TYPE_MAP.get(typeSimpleName);
if (icebergType == null && field.getAnnotatedType().getType() instanceof ParameterizedType) {
Type[] actualTypeArguments = ((ParameterizedType) field.getAnnotatedType().getType()).getActualTypeArguments();
switch(typeSimpleName) {
case "list":
org.apache.iceberg.types.Type listIcebergType = getGenericsType(actualTypeArguments[0]);
icebergType = Types.ListType.ofOptional(complexTypesIndex++, listIcebergType);
break;
case "map":
org.apache.iceberg.types.Type keyIcebergType = getGenericsType(actualTypeArguments[0]);
org.apache.iceberg.types.Type valueIcebergType = getGenericsType(actualTypeArguments[1]);
icebergType = Types.MapType.ofOptional(complexTypesIndex++, complexTypesIndex++, keyIcebergType, valueIcebergType);
break;
default:
throw new IcebergMetastoreException(String.format("Unexpected parametrized type for class [%s]: %s", clazz.getCanonicalName(), typeSimpleName));
}
}
if (icebergType == null) {
throw new IcebergMetastoreException(String.format("Unexpected type for class [%s]: %s", clazz.getCanonicalName(), typeSimpleName));
}
Types.NestedField icebergField = Types.NestedField.optional(schemaIndex++, column.columnName(), icebergType);
tableSchemaFields.add(icebergField);
int partitionIndex = partitionKeys.indexOf(column);
if (partitionIndex != -1) {
partitionSpecSchemaFields[partitionIndex] = icebergField;
}
}
if (Stream.of(partitionSpecSchemaFields).anyMatch(Objects::isNull)) {
throw new IcebergMetastoreException(String.format("Some of partition fields are missing in the class [%s]. Partition keys: %s. Partition values: %s.", clazz.getCanonicalName(), partitionKeys, Arrays.asList(partitionSpecSchemaFields)));
}
Schema tableSchema = new Schema(tableSchemaFields);
PartitionSpec partitionSpec = buildPartitionSpec(partitionSpecSchemaFields);
logger.debug("Constructed Iceberg table schema for class [{}]. Table schema : {}. Partition spec: {}.", clazz.getCanonicalName(), tableSchema, partitionSpec);
return new IcebergTableSchema(tableSchema, partitionSpec);
}
use of org.apache.iceberg.PartitionSpec in project drill by apache.
the class TestIcebergTableSchema method testPartitionedPartitionSpec.
@Test
public void testPartitionedPartitionSpec() {
Class<?> clazz = new ClassGenerator(getClass().getSimpleName() + "PartitionedPartitionSpec") {
@Override
void addFields(ClassWriter classWriter) {
FieldVisitor partKey1 = addField(classWriter, Opcodes.ACC_PRIVATE, MetastoreColumn.STORAGE_PLUGIN, String.class);
annotate(partKey1, MetastoreColumn.STORAGE_PLUGIN, MetadataType.ALL);
FieldVisitor partKey2 = addField(classWriter, Opcodes.ACC_PRIVATE, MetastoreColumn.WORKSPACE, String.class);
annotate(partKey2, MetastoreColumn.WORKSPACE, MetadataType.ALL);
FieldVisitor partKey3 = addField(classWriter, Opcodes.ACC_PRIVATE, MetastoreColumn.TABLE_NAME, String.class);
annotate(partKey3, MetastoreColumn.TABLE_NAME, MetadataType.ALL);
FieldVisitor integerField = addField(classWriter, Opcodes.ACC_PRIVATE, MetastoreColumn.ROW_GROUP_INDEX, Integer.class);
annotate(integerField, MetastoreColumn.ROW_GROUP_INDEX, MetadataType.ROW_GROUP);
FieldVisitor stringField = addField(classWriter, Opcodes.ACC_PRIVATE, MetastoreColumn.OWNER, Boolean.class);
annotate(stringField, MetastoreColumn.OWNER, MetadataType.TABLE);
}
}.generate();
IcebergTableSchema schema = IcebergTableSchema.of(clazz, Arrays.asList(MetastoreColumn.STORAGE_PLUGIN, MetastoreColumn.WORKSPACE, MetastoreColumn.TABLE_NAME));
Types.NestedField partKey1 = schema.tableSchema().findField(MetastoreColumn.STORAGE_PLUGIN.columnName());
assertNotNull(partKey1);
Types.NestedField partKey2 = schema.tableSchema().findField(MetastoreColumn.WORKSPACE.columnName());
assertNotNull(partKey2);
Types.NestedField partKey3 = schema.tableSchema().findField(MetastoreColumn.TABLE_NAME.columnName());
assertNotNull(partKey3);
assertNotNull(schema.tableSchema().findField(MetastoreColumn.ROW_GROUP_INDEX.columnName()));
assertNotNull(schema.tableSchema().findField(MetastoreColumn.OWNER.columnName()));
Schema partitionSchema = new Schema(partKey1, partKey2, partKey3);
PartitionSpec expectedPartitionSpec = PartitionSpec.builderFor(partitionSchema).identity(partKey1.name()).identity(partKey2.name()).identity(partKey3.name()).build();
assertEquals(expectedPartitionSpec, schema.partitionSpec());
}
use of org.apache.iceberg.PartitionSpec in project presto by prestodb.
the class IcebergPageSinkProvider method createPageSink.
private ConnectorPageSink createPageSink(ConnectorSession session, IcebergWritableTableHandle tableHandle) {
HdfsContext hdfsContext = new HdfsContext(session, tableHandle.getSchemaName(), tableHandle.getTableName());
Schema schema = SchemaParser.fromJson(tableHandle.getSchemaAsJson());
PartitionSpec partitionSpec = PartitionSpecParser.fromJson(schema, tableHandle.getPartitionSpecAsJson());
LocationProvider locationProvider = getLocationProvider(new SchemaTableName(tableHandle.getSchemaName(), tableHandle.getTableName()), tableHandle.getOutputPath(), tableHandle.getStorageProperties());
return new IcebergPageSink(schema, partitionSpec, locationProvider, fileWriterFactory, pageIndexerFactory, hdfsEnvironment, hdfsContext, tableHandle.getInputColumns(), jsonCodec, session, tableHandle.getFileFormat());
}
use of org.apache.iceberg.PartitionSpec in project presto by prestodb.
the class IcebergHadoopMetadata method beginCreateTable.
@Override
public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, Optional<ConnectorNewTableLayout> layout) {
SchemaTableName schemaTableName = tableMetadata.getTable();
String schemaName = schemaTableName.getSchemaName();
String tableName = schemaTableName.getTableName();
Schema schema = toIcebergSchema(tableMetadata.getColumns());
PartitionSpec partitionSpec = parsePartitionFields(schema, getPartitioning(tableMetadata.getProperties()));
ImmutableMap.Builder<String, String> propertiesBuilder = ImmutableMap.builder();
FileFormat fileFormat = getFileFormat(tableMetadata.getProperties());
propertiesBuilder.put(DEFAULT_FILE_FORMAT, fileFormat.toString());
if (tableMetadata.getComment().isPresent()) {
propertiesBuilder.put(TABLE_COMMENT, tableMetadata.getComment().get());
}
String formatVersion = getFormatVersion(tableMetadata.getProperties());
if (formatVersion != null) {
propertiesBuilder.put(FORMAT_VERSION, formatVersion);
}
try {
transaction = resourceFactory.getCatalog(session).newCreateTableTransaction(toIcebergTableIdentifier(schemaTableName), schema, partitionSpec, propertiesBuilder.build());
} catch (AlreadyExistsException e) {
throw new TableAlreadyExistsException(schemaTableName);
}
Table icebergTable = transaction.table();
return new IcebergWritableTableHandle(schemaName, tableName, SchemaParser.toJson(icebergTable.schema()), PartitionSpecParser.toJson(icebergTable.spec()), getColumns(icebergTable.schema(), typeManager), icebergTable.location(), fileFormat, icebergTable.properties());
}
use of org.apache.iceberg.PartitionSpec in project presto by prestodb.
the class ManifestsTable method buildPages.
private static List<Page> buildPages(ConnectorTableMetadata tableMetadata, Table icebergTable, long snapshotId) {
PageListBuilder pagesBuilder = PageListBuilder.forTable(tableMetadata);
Snapshot snapshot = icebergTable.snapshot(snapshotId);
if (snapshot == null) {
throw new PrestoException(ICEBERG_INVALID_METADATA, format("Snapshot ID [%s] does not exist for table: %s", snapshotId, icebergTable));
}
Map<Integer, PartitionSpec> partitionSpecsById = icebergTable.specs();
snapshot.allManifests().forEach(file -> {
pagesBuilder.beginRow();
pagesBuilder.appendVarchar(file.path());
pagesBuilder.appendBigint(file.length());
pagesBuilder.appendInteger(file.partitionSpecId());
pagesBuilder.appendBigint(file.snapshotId());
pagesBuilder.appendInteger(file.addedFilesCount());
pagesBuilder.appendInteger(file.existingFilesCount());
pagesBuilder.appendInteger(file.deletedFilesCount());
writePartitionSummaries(pagesBuilder.nextColumn(), file.partitions(), partitionSpecsById.get(file.partitionSpecId()));
pagesBuilder.endRow();
});
return pagesBuilder.build();
}
Aggregations