Search in sources :

Example 1 with IcebergMetastoreException

use of org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException in project drill by apache.

the class IcebergTableSchema method of.

/**
 * Based on given class fields annotated with {@link MetastoreFieldDefinition}
 * generates Iceberg table schema and its partition specification.
 *
 * @param clazz base class for Iceberg schema
 * @param partitionKeys list of partition keys
 * @return instance of Iceberg table schema
 */
public static IcebergTableSchema of(Class<?> clazz, List<MetastoreColumn> partitionKeys) {
    List<Types.NestedField> tableSchemaFields = new ArrayList<>();
    Types.NestedField[] partitionSpecSchemaFields = new Types.NestedField[partitionKeys.size()];
    int schemaIndex = STARTING_SCHEMA_INDEX;
    int complexTypesIndex = STARTING_COMPLEX_TYPES_INDEX;
    for (Field field : clazz.getDeclaredFields()) {
        MetastoreFieldDefinition definition = field.getAnnotation(MetastoreFieldDefinition.class);
        if (definition == null) {
            continue;
        }
        MetastoreColumn column = definition.column();
        String typeSimpleName = field.getType().getSimpleName().toLowerCase();
        org.apache.iceberg.types.Type icebergType = JAVA_TO_ICEBERG_TYPE_MAP.get(typeSimpleName);
        if (icebergType == null && field.getAnnotatedType().getType() instanceof ParameterizedType) {
            Type[] actualTypeArguments = ((ParameterizedType) field.getAnnotatedType().getType()).getActualTypeArguments();
            switch(typeSimpleName) {
                case "list":
                    org.apache.iceberg.types.Type listIcebergType = getGenericsType(actualTypeArguments[0]);
                    icebergType = Types.ListType.ofOptional(complexTypesIndex++, listIcebergType);
                    break;
                case "map":
                    org.apache.iceberg.types.Type keyIcebergType = getGenericsType(actualTypeArguments[0]);
                    org.apache.iceberg.types.Type valueIcebergType = getGenericsType(actualTypeArguments[1]);
                    icebergType = Types.MapType.ofOptional(complexTypesIndex++, complexTypesIndex++, keyIcebergType, valueIcebergType);
                    break;
                default:
                    throw new IcebergMetastoreException(String.format("Unexpected parametrized type for class [%s]: %s", clazz.getCanonicalName(), typeSimpleName));
            }
        }
        if (icebergType == null) {
            throw new IcebergMetastoreException(String.format("Unexpected type for class [%s]: %s", clazz.getCanonicalName(), typeSimpleName));
        }
        Types.NestedField icebergField = Types.NestedField.optional(schemaIndex++, column.columnName(), icebergType);
        tableSchemaFields.add(icebergField);
        int partitionIndex = partitionKeys.indexOf(column);
        if (partitionIndex != -1) {
            partitionSpecSchemaFields[partitionIndex] = icebergField;
        }
    }
    if (Stream.of(partitionSpecSchemaFields).anyMatch(Objects::isNull)) {
        throw new IcebergMetastoreException(String.format("Some of partition fields are missing in the class [%s]. Partition keys: %s. Partition values: %s.", clazz.getCanonicalName(), partitionKeys, Arrays.asList(partitionSpecSchemaFields)));
    }
    Schema tableSchema = new Schema(tableSchemaFields);
    PartitionSpec partitionSpec = buildPartitionSpec(partitionSpecSchemaFields);
    logger.debug("Constructed Iceberg table schema for class [{}]. Table schema : {}. Partition spec: {}.", clazz.getCanonicalName(), tableSchema, partitionSpec);
    return new IcebergTableSchema(tableSchema, partitionSpec);
}
Also used : IcebergMetastoreException(org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException) Types(org.apache.iceberg.types.Types) Schema(org.apache.iceberg.Schema) ArrayList(java.util.ArrayList) MetastoreFieldDefinition(org.apache.drill.metastore.MetastoreFieldDefinition) PartitionSpec(org.apache.iceberg.PartitionSpec) MetastoreColumn(org.apache.drill.metastore.MetastoreColumn) ParameterizedType(java.lang.reflect.ParameterizedType) Field(java.lang.reflect.Field) ParameterizedType(java.lang.reflect.ParameterizedType) Type(java.lang.reflect.Type) Objects(java.util.Objects)

Example 2 with IcebergMetastoreException

use of org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException in project drill by apache.

the class IcebergMetastore method baseLocation.

/**
 * Constructs Iceberg tables base location based on given base and relative paths.
 * If {@link IcebergConfigConstants#BASE_PATH} is not set, user home directory is used.
 * {@link IcebergConfigConstants#RELATIVE_PATH} must be set.
 *
 * @param configuration Hadoop configuration
 * @return Iceberg table base location
 * @throws IcebergMetastoreException if unable to init file system
 *         or Iceberg Metastore relative path is not indicated
 */
private String baseLocation(Configuration configuration) {
    FileSystem fs;
    try {
        fs = FileSystem.get(configuration);
    } catch (IOException e) {
        throw new IcebergMetastoreException(String.format("Error during file system [%s] setup", configuration.get(FileSystem.FS_DEFAULT_NAME_KEY)));
    }
    String root = fs.getHomeDirectory().toUri().getPath();
    if (config.hasPath(IcebergConfigConstants.BASE_PATH)) {
        root = config.getString(IcebergConfigConstants.BASE_PATH);
    }
    String relativeLocation = config.getString(IcebergConfigConstants.RELATIVE_PATH);
    if (relativeLocation == null) {
        throw new IcebergMetastoreException(String.format("Iceberg Metastore relative path [%s] is not provided", IcebergConfigConstants.RELATIVE_PATH));
    }
    String location = new Path(root, relativeLocation).toUri().getPath();
    logger.info("Iceberg Metastore is located in [{}] on file system [{}]", location, fs.getUri());
    return location;
}
Also used : IcebergMetastoreException(org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException) Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) IOException(java.io.IOException)

Example 3 with IcebergMetastoreException

use of org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException in project drill by apache.

the class InputDataTransformer method getPartition.

/**
 * Generates record with partition values based on given partition schema
 * and Metastore component unit instance.
 *
 * @param unit specific Metastore component unit
 * @param schema partition schema
 * @param unitGetters specific Metastore component unit getters
 * @return {@link Record} with partition values
 * @throws IcebergMetastoreException if getter to partition column is absent or
 *         partition column value is null
 */
private Record getPartition(T unit, Schema schema, Map<String, MethodHandle> unitGetters) {
    Record partitionRecord = GenericRecord.create(schema);
    for (Types.NestedField column : schema.columns()) {
        String name = column.name();
        MethodHandle methodHandle = unitGetters.get(name);
        if (methodHandle == null) {
            throw new IcebergMetastoreException(String.format("Getter for partition key [%s::%s] must be declared in [%s] class", name, column.type(), unit.getClass().getSimpleName()));
        }
        Object value;
        try {
            value = methodHandle.invoke(unit);
        } catch (Throwable e) {
            throw new IcebergMetastoreException(String.format("Unable to invoke getter for column [%s] using [%s]", name, methodHandle), e);
        }
        if (value == null) {
            throw new IcebergMetastoreException(String.format("Partition key [%s::%s] value must be set", name, column.type()));
        }
        partitionRecord.setField(name, value);
    }
    return partitionRecord;
}
Also used : IcebergMetastoreException(org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException) Types(org.apache.iceberg.types.Types) Record(org.apache.iceberg.data.Record) GenericRecord(org.apache.iceberg.data.GenericRecord) MethodHandle(java.lang.invoke.MethodHandle)

Example 4 with IcebergMetastoreException

use of org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException in project drill by apache.

the class InputDataTransformer method execute.

public WriteData execute() {
    List<Record> records = new ArrayList<>();
    Set<Record> partitions = new HashSet<>();
    for (T unit : units) {
        partitions.add(getPartition(unit, partitionSpecSchema, unitGetters));
        Record record = GenericRecord.create(tableSchema);
        for (Types.NestedField column : tableSchema.columns()) {
            String name = column.name();
            MethodHandle methodHandle = unitGetters.get(name);
            if (methodHandle == null) {
                // ignore absent getters
                continue;
            }
            try {
                record.setField(name, methodHandle.invoke(unit));
            } catch (Throwable e) {
                throw new IcebergMetastoreException(String.format("Unable to invoke getter for column [%s] using [%s]", name, methodHandle), e);
            }
        }
        records.add(record);
    }
    if (partitions.size() > 1) {
        throw new IcebergMetastoreException(String.format("Partition keys values must be the same for all records in the partition. " + "Partition schema: [%s]. Received partition values: %s", partitionSpecSchema, partitions));
    }
    return new WriteData(records, partitions.isEmpty() ? null : partitions.iterator().next());
}
Also used : IcebergMetastoreException(org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException) Types(org.apache.iceberg.types.Types) ArrayList(java.util.ArrayList) Record(org.apache.iceberg.data.Record) GenericRecord(org.apache.iceberg.data.GenericRecord) HashSet(java.util.HashSet) MethodHandle(java.lang.invoke.MethodHandle)

Example 5 with IcebergMetastoreException

use of org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException in project drill by apache.

the class ParquetFileWriter method write.

@Override
public File write() {
    Objects.requireNonNull(location, "File create location must be specified");
    Objects.requireNonNull(name, "File name must be specified");
    OutputFile outputFile = table.io().newOutputFile(new Path(location, FileFormat.PARQUET.addExtension(name)).toUri().getPath());
    FileAppender<Record> fileAppender = null;
    try {
        fileAppender = Parquet.write(outputFile).forTable(table).createWriterFunc(GenericParquetWriter::buildWriter).build();
        fileAppender.addAll(records);
        fileAppender.close();
        // metrics are available only when file was written (i.e. close method was executed)
        return new File(outputFile, fileAppender.metrics());
    } catch (IOException | ClassCastException | RuntimeIOException e) {
        if (fileAppender != null) {
            try {
                fileAppender.close();
            } catch (Exception ex) {
            // write has failed anyway, ignore closing exception if any and throw initial one
            }
        }
        throw new IcebergMetastoreException(String.format("Unable to write data into parquet file [%s]", outputFile.location()), e);
    }
}
Also used : OutputFile(org.apache.iceberg.io.OutputFile) Path(org.apache.hadoop.fs.Path) IcebergMetastoreException(org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException) RuntimeIOException(org.apache.iceberg.exceptions.RuntimeIOException) GenericParquetWriter(org.apache.iceberg.data.parquet.GenericParquetWriter) Record(org.apache.iceberg.data.Record) RuntimeIOException(org.apache.iceberg.exceptions.RuntimeIOException) IOException(java.io.IOException) OutputFile(org.apache.iceberg.io.OutputFile) RuntimeIOException(org.apache.iceberg.exceptions.RuntimeIOException) IOException(java.io.IOException) IcebergMetastoreException(org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException)

Aggregations

IcebergMetastoreException (org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException)6 MethodHandle (java.lang.invoke.MethodHandle)3 ArrayList (java.util.ArrayList)3 Record (org.apache.iceberg.data.Record)3 Types (org.apache.iceberg.types.Types)3 IOException (java.io.IOException)2 Path (org.apache.hadoop.fs.Path)2 GenericRecord (org.apache.iceberg.data.GenericRecord)2 Field (java.lang.reflect.Field)1 ParameterizedType (java.lang.reflect.ParameterizedType)1 Type (java.lang.reflect.Type)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1 Objects (java.util.Objects)1 MetastoreColumn (org.apache.drill.metastore.MetastoreColumn)1 MetastoreFieldDefinition (org.apache.drill.metastore.MetastoreFieldDefinition)1 TableMetadataUnit (org.apache.drill.metastore.components.tables.TableMetadataUnit)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 PartitionSpec (org.apache.iceberg.PartitionSpec)1 Schema (org.apache.iceberg.Schema)1