use of org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException in project drill by apache.
the class IcebergTableSchema method of.
/**
* Based on given class fields annotated with {@link MetastoreFieldDefinition}
* generates Iceberg table schema and its partition specification.
*
* @param clazz base class for Iceberg schema
* @param partitionKeys list of partition keys
* @return instance of Iceberg table schema
*/
public static IcebergTableSchema of(Class<?> clazz, List<MetastoreColumn> partitionKeys) {
List<Types.NestedField> tableSchemaFields = new ArrayList<>();
Types.NestedField[] partitionSpecSchemaFields = new Types.NestedField[partitionKeys.size()];
int schemaIndex = STARTING_SCHEMA_INDEX;
int complexTypesIndex = STARTING_COMPLEX_TYPES_INDEX;
for (Field field : clazz.getDeclaredFields()) {
MetastoreFieldDefinition definition = field.getAnnotation(MetastoreFieldDefinition.class);
if (definition == null) {
continue;
}
MetastoreColumn column = definition.column();
String typeSimpleName = field.getType().getSimpleName().toLowerCase();
org.apache.iceberg.types.Type icebergType = JAVA_TO_ICEBERG_TYPE_MAP.get(typeSimpleName);
if (icebergType == null && field.getAnnotatedType().getType() instanceof ParameterizedType) {
Type[] actualTypeArguments = ((ParameterizedType) field.getAnnotatedType().getType()).getActualTypeArguments();
switch(typeSimpleName) {
case "list":
org.apache.iceberg.types.Type listIcebergType = getGenericsType(actualTypeArguments[0]);
icebergType = Types.ListType.ofOptional(complexTypesIndex++, listIcebergType);
break;
case "map":
org.apache.iceberg.types.Type keyIcebergType = getGenericsType(actualTypeArguments[0]);
org.apache.iceberg.types.Type valueIcebergType = getGenericsType(actualTypeArguments[1]);
icebergType = Types.MapType.ofOptional(complexTypesIndex++, complexTypesIndex++, keyIcebergType, valueIcebergType);
break;
default:
throw new IcebergMetastoreException(String.format("Unexpected parametrized type for class [%s]: %s", clazz.getCanonicalName(), typeSimpleName));
}
}
if (icebergType == null) {
throw new IcebergMetastoreException(String.format("Unexpected type for class [%s]: %s", clazz.getCanonicalName(), typeSimpleName));
}
Types.NestedField icebergField = Types.NestedField.optional(schemaIndex++, column.columnName(), icebergType);
tableSchemaFields.add(icebergField);
int partitionIndex = partitionKeys.indexOf(column);
if (partitionIndex != -1) {
partitionSpecSchemaFields[partitionIndex] = icebergField;
}
}
if (Stream.of(partitionSpecSchemaFields).anyMatch(Objects::isNull)) {
throw new IcebergMetastoreException(String.format("Some of partition fields are missing in the class [%s]. Partition keys: %s. Partition values: %s.", clazz.getCanonicalName(), partitionKeys, Arrays.asList(partitionSpecSchemaFields)));
}
Schema tableSchema = new Schema(tableSchemaFields);
PartitionSpec partitionSpec = buildPartitionSpec(partitionSpecSchemaFields);
logger.debug("Constructed Iceberg table schema for class [{}]. Table schema : {}. Partition spec: {}.", clazz.getCanonicalName(), tableSchema, partitionSpec);
return new IcebergTableSchema(tableSchema, partitionSpec);
}
use of org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException in project drill by apache.
the class IcebergMetastore method baseLocation.
/**
* Constructs Iceberg tables base location based on given base and relative paths.
* If {@link IcebergConfigConstants#BASE_PATH} is not set, user home directory is used.
* {@link IcebergConfigConstants#RELATIVE_PATH} must be set.
*
* @param configuration Hadoop configuration
* @return Iceberg table base location
* @throws IcebergMetastoreException if unable to init file system
* or Iceberg Metastore relative path is not indicated
*/
private String baseLocation(Configuration configuration) {
FileSystem fs;
try {
fs = FileSystem.get(configuration);
} catch (IOException e) {
throw new IcebergMetastoreException(String.format("Error during file system [%s] setup", configuration.get(FileSystem.FS_DEFAULT_NAME_KEY)));
}
String root = fs.getHomeDirectory().toUri().getPath();
if (config.hasPath(IcebergConfigConstants.BASE_PATH)) {
root = config.getString(IcebergConfigConstants.BASE_PATH);
}
String relativeLocation = config.getString(IcebergConfigConstants.RELATIVE_PATH);
if (relativeLocation == null) {
throw new IcebergMetastoreException(String.format("Iceberg Metastore relative path [%s] is not provided", IcebergConfigConstants.RELATIVE_PATH));
}
String location = new Path(root, relativeLocation).toUri().getPath();
logger.info("Iceberg Metastore is located in [{}] on file system [{}]", location, fs.getUri());
return location;
}
use of org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException in project drill by apache.
the class InputDataTransformer method getPartition.
/**
* Generates record with partition values based on given partition schema
* and Metastore component unit instance.
*
* @param unit specific Metastore component unit
* @param schema partition schema
* @param unitGetters specific Metastore component unit getters
* @return {@link Record} with partition values
* @throws IcebergMetastoreException if getter to partition column is absent or
* partition column value is null
*/
private Record getPartition(T unit, Schema schema, Map<String, MethodHandle> unitGetters) {
Record partitionRecord = GenericRecord.create(schema);
for (Types.NestedField column : schema.columns()) {
String name = column.name();
MethodHandle methodHandle = unitGetters.get(name);
if (methodHandle == null) {
throw new IcebergMetastoreException(String.format("Getter for partition key [%s::%s] must be declared in [%s] class", name, column.type(), unit.getClass().getSimpleName()));
}
Object value;
try {
value = methodHandle.invoke(unit);
} catch (Throwable e) {
throw new IcebergMetastoreException(String.format("Unable to invoke getter for column [%s] using [%s]", name, methodHandle), e);
}
if (value == null) {
throw new IcebergMetastoreException(String.format("Partition key [%s::%s] value must be set", name, column.type()));
}
partitionRecord.setField(name, value);
}
return partitionRecord;
}
use of org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException in project drill by apache.
the class InputDataTransformer method execute.
public WriteData execute() {
List<Record> records = new ArrayList<>();
Set<Record> partitions = new HashSet<>();
for (T unit : units) {
partitions.add(getPartition(unit, partitionSpecSchema, unitGetters));
Record record = GenericRecord.create(tableSchema);
for (Types.NestedField column : tableSchema.columns()) {
String name = column.name();
MethodHandle methodHandle = unitGetters.get(name);
if (methodHandle == null) {
// ignore absent getters
continue;
}
try {
record.setField(name, methodHandle.invoke(unit));
} catch (Throwable e) {
throw new IcebergMetastoreException(String.format("Unable to invoke getter for column [%s] using [%s]", name, methodHandle), e);
}
}
records.add(record);
}
if (partitions.size() > 1) {
throw new IcebergMetastoreException(String.format("Partition keys values must be the same for all records in the partition. " + "Partition schema: [%s]. Received partition values: %s", partitionSpecSchema, partitions));
}
return new WriteData(records, partitions.isEmpty() ? null : partitions.iterator().next());
}
use of org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException in project drill by apache.
the class ParquetFileWriter method write.
@Override
public File write() {
Objects.requireNonNull(location, "File create location must be specified");
Objects.requireNonNull(name, "File name must be specified");
OutputFile outputFile = table.io().newOutputFile(new Path(location, FileFormat.PARQUET.addExtension(name)).toUri().getPath());
FileAppender<Record> fileAppender = null;
try {
fileAppender = Parquet.write(outputFile).forTable(table).createWriterFunc(GenericParquetWriter::buildWriter).build();
fileAppender.addAll(records);
fileAppender.close();
// metrics are available only when file was written (i.e. close method was executed)
return new File(outputFile, fileAppender.metrics());
} catch (IOException | ClassCastException | RuntimeIOException e) {
if (fileAppender != null) {
try {
fileAppender.close();
} catch (Exception ex) {
// write has failed anyway, ignore closing exception if any and throw initial one
}
}
throw new IcebergMetastoreException(String.format("Unable to write data into parquet file [%s]", outputFile.location()), e);
}
}
Aggregations