use of org.apache.iceberg.data.Record in project drill by apache.
the class InputDataTransformer method getPartition.
/**
* Generates record with partition values based on given partition schema
* and Metastore component unit instance.
*
* @param unit specific Metastore component unit
* @param schema partition schema
* @param unitGetters specific Metastore component unit getters
* @return {@link Record} with partition values
* @throws IcebergMetastoreException if getter to partition column is absent or
* partition column value is null
*/
private Record getPartition(T unit, Schema schema, Map<String, MethodHandle> unitGetters) {
Record partitionRecord = GenericRecord.create(schema);
for (Types.NestedField column : schema.columns()) {
String name = column.name();
MethodHandle methodHandle = unitGetters.get(name);
if (methodHandle == null) {
throw new IcebergMetastoreException(String.format("Getter for partition key [%s::%s] must be declared in [%s] class", name, column.type(), unit.getClass().getSimpleName()));
}
Object value;
try {
value = methodHandle.invoke(unit);
} catch (Throwable e) {
throw new IcebergMetastoreException(String.format("Unable to invoke getter for column [%s] using [%s]", name, methodHandle), e);
}
if (value == null) {
throw new IcebergMetastoreException(String.format("Partition key [%s::%s] value must be set", name, column.type()));
}
partitionRecord.setField(name, value);
}
return partitionRecord;
}
use of org.apache.iceberg.data.Record in project drill by apache.
the class InputDataTransformer method execute.
public WriteData execute() {
List<Record> records = new ArrayList<>();
Set<Record> partitions = new HashSet<>();
for (T unit : units) {
partitions.add(getPartition(unit, partitionSpecSchema, unitGetters));
Record record = GenericRecord.create(tableSchema);
for (Types.NestedField column : tableSchema.columns()) {
String name = column.name();
MethodHandle methodHandle = unitGetters.get(name);
if (methodHandle == null) {
// ignore absent getters
continue;
}
try {
record.setField(name, methodHandle.invoke(unit));
} catch (Throwable e) {
throw new IcebergMetastoreException(String.format("Unable to invoke getter for column [%s] using [%s]", name, methodHandle), e);
}
}
records.add(record);
}
if (partitions.size() > 1) {
throw new IcebergMetastoreException(String.format("Partition keys values must be the same for all records in the partition. " + "Partition schema: [%s]. Received partition values: %s", partitionSpecSchema, partitions));
}
return new WriteData(records, partitions.isEmpty() ? null : partitions.iterator().next());
}
use of org.apache.iceberg.data.Record in project drill by apache.
the class TestTablesInputDataTransformer method testValidDataSeveralRecords.
@Test
public void testValidDataSeveralRecords() {
List<TableMetadataUnit> units = Arrays.asList(TableMetadataUnit.builder().storagePlugin("dfs").workspace("tmp").tableName("nation").metadataKey(MetadataInfo.GENERAL_INFO_KEY).column("a").build(), TableMetadataUnit.builder().storagePlugin("dfs").workspace("tmp").tableName("nation").metadataKey(MetadataInfo.GENERAL_INFO_KEY).column("b").build(), TableMetadataUnit.builder().storagePlugin("dfs").workspace("tmp").tableName("nation").metadataKey(MetadataInfo.GENERAL_INFO_KEY).column("c").build());
WriteData writeData = new InputDataTransformer<TableMetadataUnit>(metastoreSchema, partitionSchema, unitGetters).units(units).execute();
Record tableRecord1 = GenericRecord.create(metastoreSchema);
tableRecord1.setField("storagePlugin", "dfs");
tableRecord1.setField("workspace", "tmp");
tableRecord1.setField("tableName", "nation");
tableRecord1.setField("metadataKey", MetadataInfo.GENERAL_INFO_KEY);
tableRecord1.setField("column", "a");
Record tableRecord2 = GenericRecord.create(metastoreSchema);
tableRecord2.setField("storagePlugin", "dfs");
tableRecord2.setField("workspace", "tmp");
tableRecord2.setField("tableName", "nation");
tableRecord2.setField("metadataKey", MetadataInfo.GENERAL_INFO_KEY);
tableRecord2.setField("column", "b");
Record tableRecord3 = GenericRecord.create(metastoreSchema);
tableRecord3.setField("storagePlugin", "dfs");
tableRecord3.setField("workspace", "tmp");
tableRecord3.setField("tableName", "nation");
tableRecord3.setField("metadataKey", MetadataInfo.GENERAL_INFO_KEY);
tableRecord3.setField("column", "c");
Record partitionRecord = GenericRecord.create(partitionSchema);
partitionRecord.setField("storagePlugin", "dfs");
partitionRecord.setField("workspace", "tmp");
partitionRecord.setField("tableName", "nation");
partitionRecord.setField("metadataKey", MetadataInfo.GENERAL_INFO_KEY);
assertEquals(Arrays.asList(tableRecord1, tableRecord2, tableRecord3), writeData.records());
assertEquals(partitionRecord, writeData.partition());
}
use of org.apache.iceberg.data.Record in project drill by apache.
the class TestTablesOutputDataTransformer method testInvalidColumns.
@Test
public void testInvalidColumns() {
Record record = GenericRecord.create(schema);
record.setField("tableName", "a");
List<TableMetadataUnit> actualResult = new TablesOutputDataTransformer(unitSetters).records(Collections.singletonList(record)).columns(Arrays.asList("a", "b")).execute();
List<TableMetadataUnit> expectedResult = Collections.singletonList(TableMetadataUnit.builder().build());
assertEquals(expectedResult, actualResult);
}
use of org.apache.iceberg.data.Record in project drill by apache.
the class TestParquetFileWriter method testTypeMismatch.
@Test
public void testTypeMismatch() throws Exception {
Schema schema = new Schema(Types.NestedField.optional(1, "int_field", Types.IntegerType.get()));
Record record = GenericRecord.create(schema);
record.setField("int_field", 1);
record.setField("int_field", "abc");
String location = defaultFolder.newFolder("testTypeMismatch").toURI().getPath();
Table table = tables.create(schema, location);
thrown.expect(IcebergMetastoreException.class);
new ParquetFileWriter(table).records(Collections.singletonList(record)).location(location).name("typeMismatch").write();
}
Aggregations