Search in sources :

Example 1 with Record

use of org.apache.iceberg.data.Record in project drill by apache.

the class InputDataTransformer method getPartition.

/**
 * Generates record with partition values based on given partition schema
 * and Metastore component unit instance.
 *
 * @param unit specific Metastore component unit
 * @param schema partition schema
 * @param unitGetters specific Metastore component unit getters
 * @return {@link Record} with partition values
 * @throws IcebergMetastoreException if getter to partition column is absent or
 *         partition column value is null
 */
private Record getPartition(T unit, Schema schema, Map<String, MethodHandle> unitGetters) {
    Record partitionRecord = GenericRecord.create(schema);
    for (Types.NestedField column : schema.columns()) {
        String name = column.name();
        MethodHandle methodHandle = unitGetters.get(name);
        if (methodHandle == null) {
            throw new IcebergMetastoreException(String.format("Getter for partition key [%s::%s] must be declared in [%s] class", name, column.type(), unit.getClass().getSimpleName()));
        }
        Object value;
        try {
            value = methodHandle.invoke(unit);
        } catch (Throwable e) {
            throw new IcebergMetastoreException(String.format("Unable to invoke getter for column [%s] using [%s]", name, methodHandle), e);
        }
        if (value == null) {
            throw new IcebergMetastoreException(String.format("Partition key [%s::%s] value must be set", name, column.type()));
        }
        partitionRecord.setField(name, value);
    }
    return partitionRecord;
}
Also used : IcebergMetastoreException(org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException) Types(org.apache.iceberg.types.Types) Record(org.apache.iceberg.data.Record) GenericRecord(org.apache.iceberg.data.GenericRecord) MethodHandle(java.lang.invoke.MethodHandle)

Example 2 with Record

use of org.apache.iceberg.data.Record in project drill by apache.

the class InputDataTransformer method execute.

public WriteData execute() {
    List<Record> records = new ArrayList<>();
    Set<Record> partitions = new HashSet<>();
    for (T unit : units) {
        partitions.add(getPartition(unit, partitionSpecSchema, unitGetters));
        Record record = GenericRecord.create(tableSchema);
        for (Types.NestedField column : tableSchema.columns()) {
            String name = column.name();
            MethodHandle methodHandle = unitGetters.get(name);
            if (methodHandle == null) {
                // ignore absent getters
                continue;
            }
            try {
                record.setField(name, methodHandle.invoke(unit));
            } catch (Throwable e) {
                throw new IcebergMetastoreException(String.format("Unable to invoke getter for column [%s] using [%s]", name, methodHandle), e);
            }
        }
        records.add(record);
    }
    if (partitions.size() > 1) {
        throw new IcebergMetastoreException(String.format("Partition keys values must be the same for all records in the partition. " + "Partition schema: [%s]. Received partition values: %s", partitionSpecSchema, partitions));
    }
    return new WriteData(records, partitions.isEmpty() ? null : partitions.iterator().next());
}
Also used : IcebergMetastoreException(org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException) Types(org.apache.iceberg.types.Types) ArrayList(java.util.ArrayList) Record(org.apache.iceberg.data.Record) GenericRecord(org.apache.iceberg.data.GenericRecord) HashSet(java.util.HashSet) MethodHandle(java.lang.invoke.MethodHandle)

Example 3 with Record

use of org.apache.iceberg.data.Record in project drill by apache.

the class TestTablesInputDataTransformer method testValidDataSeveralRecords.

@Test
public void testValidDataSeveralRecords() {
    List<TableMetadataUnit> units = Arrays.asList(TableMetadataUnit.builder().storagePlugin("dfs").workspace("tmp").tableName("nation").metadataKey(MetadataInfo.GENERAL_INFO_KEY).column("a").build(), TableMetadataUnit.builder().storagePlugin("dfs").workspace("tmp").tableName("nation").metadataKey(MetadataInfo.GENERAL_INFO_KEY).column("b").build(), TableMetadataUnit.builder().storagePlugin("dfs").workspace("tmp").tableName("nation").metadataKey(MetadataInfo.GENERAL_INFO_KEY).column("c").build());
    WriteData writeData = new InputDataTransformer<TableMetadataUnit>(metastoreSchema, partitionSchema, unitGetters).units(units).execute();
    Record tableRecord1 = GenericRecord.create(metastoreSchema);
    tableRecord1.setField("storagePlugin", "dfs");
    tableRecord1.setField("workspace", "tmp");
    tableRecord1.setField("tableName", "nation");
    tableRecord1.setField("metadataKey", MetadataInfo.GENERAL_INFO_KEY);
    tableRecord1.setField("column", "a");
    Record tableRecord2 = GenericRecord.create(metastoreSchema);
    tableRecord2.setField("storagePlugin", "dfs");
    tableRecord2.setField("workspace", "tmp");
    tableRecord2.setField("tableName", "nation");
    tableRecord2.setField("metadataKey", MetadataInfo.GENERAL_INFO_KEY);
    tableRecord2.setField("column", "b");
    Record tableRecord3 = GenericRecord.create(metastoreSchema);
    tableRecord3.setField("storagePlugin", "dfs");
    tableRecord3.setField("workspace", "tmp");
    tableRecord3.setField("tableName", "nation");
    tableRecord3.setField("metadataKey", MetadataInfo.GENERAL_INFO_KEY);
    tableRecord3.setField("column", "c");
    Record partitionRecord = GenericRecord.create(partitionSchema);
    partitionRecord.setField("storagePlugin", "dfs");
    partitionRecord.setField("workspace", "tmp");
    partitionRecord.setField("tableName", "nation");
    partitionRecord.setField("metadataKey", MetadataInfo.GENERAL_INFO_KEY);
    assertEquals(Arrays.asList(tableRecord1, tableRecord2, tableRecord3), writeData.records());
    assertEquals(partitionRecord, writeData.partition());
}
Also used : InputDataTransformer(org.apache.drill.metastore.iceberg.transform.InputDataTransformer) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) Record(org.apache.iceberg.data.Record) GenericRecord(org.apache.iceberg.data.GenericRecord) WriteData(org.apache.drill.metastore.iceberg.transform.WriteData) Test(org.junit.Test) IcebergBaseTest(org.apache.drill.metastore.iceberg.IcebergBaseTest)

Example 4 with Record

use of org.apache.iceberg.data.Record in project drill by apache.

the class TestTablesOutputDataTransformer method testInvalidColumns.

@Test
public void testInvalidColumns() {
    Record record = GenericRecord.create(schema);
    record.setField("tableName", "a");
    List<TableMetadataUnit> actualResult = new TablesOutputDataTransformer(unitSetters).records(Collections.singletonList(record)).columns(Arrays.asList("a", "b")).execute();
    List<TableMetadataUnit> expectedResult = Collections.singletonList(TableMetadataUnit.builder().build());
    assertEquals(expectedResult, actualResult);
}
Also used : TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) Record(org.apache.iceberg.data.Record) GenericRecord(org.apache.iceberg.data.GenericRecord) Test(org.junit.Test) IcebergBaseTest(org.apache.drill.metastore.iceberg.IcebergBaseTest)

Example 5 with Record

use of org.apache.iceberg.data.Record in project drill by apache.

the class TestParquetFileWriter method testTypeMismatch.

@Test
public void testTypeMismatch() throws Exception {
    Schema schema = new Schema(Types.NestedField.optional(1, "int_field", Types.IntegerType.get()));
    Record record = GenericRecord.create(schema);
    record.setField("int_field", 1);
    record.setField("int_field", "abc");
    String location = defaultFolder.newFolder("testTypeMismatch").toURI().getPath();
    Table table = tables.create(schema, location);
    thrown.expect(IcebergMetastoreException.class);
    new ParquetFileWriter(table).records(Collections.singletonList(record)).location(location).name("typeMismatch").write();
}
Also used : Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) GenericRecord(org.apache.iceberg.data.GenericRecord) Record(org.apache.iceberg.data.Record) IcebergBaseTest(org.apache.drill.metastore.iceberg.IcebergBaseTest) Test(org.junit.Test)

Aggregations

Record (org.apache.iceberg.data.Record)114 Test (org.junit.Test)99 Schema (org.apache.iceberg.Schema)68 Table (org.apache.iceberg.Table)51 GenericRecord (org.apache.iceberg.data.GenericRecord)51 PartitionSpec (org.apache.iceberg.PartitionSpec)19 ArrayList (java.util.ArrayList)14 List (java.util.List)13 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)12 HashMap (java.util.HashMap)11 IcebergBaseTest (org.apache.drill.metastore.iceberg.IcebergBaseTest)11 TestHelper (org.apache.iceberg.mr.TestHelper)11 ImmutableList (org.apache.iceberg.relocated.com.google.common.collect.ImmutableList)10 Types (org.apache.iceberg.types.Types)10 Map (java.util.Map)9 IOException (java.io.IOException)8 ImmutableMap (org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap)8 FileFormat (org.apache.iceberg.FileFormat)7 DeleteFile (org.apache.iceberg.DeleteFile)6 NestedField.optional (org.apache.iceberg.types.Types.NestedField.optional)6