use of org.apache.drill.metastore.MetastoreColumn in project drill by apache.
the class IcebergTableSchema method of.
/**
* Based on given class fields annotated with {@link MetastoreFieldDefinition}
* generates Iceberg table schema and its partition specification.
*
* @param clazz base class for Iceberg schema
* @param partitionKeys list of partition keys
* @return instance of Iceberg table schema
*/
public static IcebergTableSchema of(Class<?> clazz, List<MetastoreColumn> partitionKeys) {
List<Types.NestedField> tableSchemaFields = new ArrayList<>();
Types.NestedField[] partitionSpecSchemaFields = new Types.NestedField[partitionKeys.size()];
int schemaIndex = STARTING_SCHEMA_INDEX;
int complexTypesIndex = STARTING_COMPLEX_TYPES_INDEX;
for (Field field : clazz.getDeclaredFields()) {
MetastoreFieldDefinition definition = field.getAnnotation(MetastoreFieldDefinition.class);
if (definition == null) {
continue;
}
MetastoreColumn column = definition.column();
String typeSimpleName = field.getType().getSimpleName().toLowerCase();
org.apache.iceberg.types.Type icebergType = JAVA_TO_ICEBERG_TYPE_MAP.get(typeSimpleName);
if (icebergType == null && field.getAnnotatedType().getType() instanceof ParameterizedType) {
Type[] actualTypeArguments = ((ParameterizedType) field.getAnnotatedType().getType()).getActualTypeArguments();
switch(typeSimpleName) {
case "list":
org.apache.iceberg.types.Type listIcebergType = getGenericsType(actualTypeArguments[0]);
icebergType = Types.ListType.ofOptional(complexTypesIndex++, listIcebergType);
break;
case "map":
org.apache.iceberg.types.Type keyIcebergType = getGenericsType(actualTypeArguments[0]);
org.apache.iceberg.types.Type valueIcebergType = getGenericsType(actualTypeArguments[1]);
icebergType = Types.MapType.ofOptional(complexTypesIndex++, complexTypesIndex++, keyIcebergType, valueIcebergType);
break;
default:
throw new IcebergMetastoreException(String.format("Unexpected parametrized type for class [%s]: %s", clazz.getCanonicalName(), typeSimpleName));
}
}
if (icebergType == null) {
throw new IcebergMetastoreException(String.format("Unexpected type for class [%s]: %s", clazz.getCanonicalName(), typeSimpleName));
}
Types.NestedField icebergField = Types.NestedField.optional(schemaIndex++, column.columnName(), icebergType);
tableSchemaFields.add(icebergField);
int partitionIndex = partitionKeys.indexOf(column);
if (partitionIndex != -1) {
partitionSpecSchemaFields[partitionIndex] = icebergField;
}
}
if (Stream.of(partitionSpecSchemaFields).anyMatch(Objects::isNull)) {
throw new IcebergMetastoreException(String.format("Some of partition fields are missing in the class [%s]. Partition keys: %s. Partition values: %s.", clazz.getCanonicalName(), partitionKeys, Arrays.asList(partitionSpecSchemaFields)));
}
Schema tableSchema = new Schema(tableSchemaFields);
PartitionSpec partitionSpec = buildPartitionSpec(partitionSpecSchemaFields);
logger.debug("Constructed Iceberg table schema for class [{}]. Table schema : {}. Partition spec: {}.", clazz.getCanonicalName(), tableSchema, partitionSpec);
return new IcebergTableSchema(tableSchema, partitionSpec);
}
use of org.apache.drill.metastore.MetastoreColumn in project drill by apache.
the class TablesOperationTransformer method toOverwrite.
/**
* Groups given list of {@link TableMetadataUnit} based on table key
* (storage plugin, workspace and table name), each table key is grouped by metadata key.
* Each group is converted into overwrite operation.
*
* @param units Metastore component units
* @return list of overwrite operations
*/
public List<Overwrite> toOverwrite(List<TableMetadataUnit> units) {
Map<TableKey, Map<String, List<TableMetadataUnit>>> data = units.stream().collect(Collectors.groupingBy(TableKey::of, Collectors.groupingBy(TableMetadataUnit::metadataKey)));
return data.entrySet().parallelStream().map(dataEntry -> dataEntry.getValue().entrySet().parallelStream().map(operationEntry -> {
TableKey tableKey = dataEntry.getKey();
String location = tableKey.toLocation(context.table().location());
Map<MetastoreColumn, Object> filterConditions = new HashMap<>(tableKey.toFilterConditions());
filterConditions.put(MetastoreColumn.METADATA_KEY, operationEntry.getKey());
Expression expression = context.transformer().filter().transform(filterConditions);
return toOverwrite(location, expression, operationEntry.getValue());
}).collect(Collectors.toList())).flatMap(Collection::stream).collect(Collectors.toList());
}
use of org.apache.drill.metastore.MetastoreColumn in project drill by apache.
the class TestTablesOperationTransformer method testToOverwriteOperation.
@Test
public void testToOverwriteOperation() {
TableMetadataUnit unit = TableMetadataUnit.builder().storagePlugin("dfs").workspace("tmp").tableName("nation").metadataKey("dir0").build();
TableKey tableKey = new TableKey(unit.storagePlugin(), unit.workspace(), unit.tableName());
Map<MetastoreColumn, Object> filterConditions = new HashMap<>(tableKey.toFilterConditions());
filterConditions.put(MetastoreColumn.METADATA_KEY, unit.metadataKey());
String location = tableKey.toLocation(TestTablesOperationTransformer.location);
Expression expression = new FilterTransformer().transform(filterConditions);
Overwrite operation = transformer.toOverwrite(location, expression, Collections.singletonList(unit));
assertEquals(expression.toString(), operation.filter().toString());
Path path = new Path(String.valueOf(operation.dataFile().path()));
File file = new File(path.toUri().getPath());
assertTrue(file.exists());
assertEquals(location, path.getParent().toUri().getPath());
}
use of org.apache.drill.metastore.MetastoreColumn in project drill by apache.
the class TestFilterTransformer method testToFilterConditionsTwo.
@Test
public void testToFilterConditionsTwo() {
Map<MetastoreColumn, Object> conditions = new LinkedHashMap<>();
conditions.put(MetastoreColumn.STORAGE_PLUGIN, "dfs");
conditions.put(MetastoreColumn.WORKSPACE, "tmp");
Expression expected = Expressions.and(Expressions.equal(MetastoreColumn.STORAGE_PLUGIN.columnName(), "dfs"), Expressions.equal(MetastoreColumn.WORKSPACE.columnName(), "tmp"));
assertEquals(expected.toString(), transformer.transform(conditions).toString());
}
use of org.apache.drill.metastore.MetastoreColumn in project drill by apache.
the class MongoRead method internalExecute.
@Override
protected List<T> internalExecute() {
FilterTransformer filterTransformer = context.transformer().filter();
Bson rowFilter = filterTransformer.combine(filterTransformer.transform(metadataTypes), filterTransformer.transform(filter));
List<Document> documents = Lists.newLinkedList();
context.table().find(rowFilter).forEach(documents::add);
return context.transformer().outputData().columns(columns.stream().map(MetastoreColumn::columnName).collect(Collectors.toList())).documents(documents).execute();
}
Aggregations