Search in sources :

Example 1 with Table

use of org.apache.iceberg.Table in project drill by apache.

the class IcebergMetastore method updateTableProperties.

/**
 * Checks config table properties against current table properties.
 * Adds properties that are absent, updates existing and removes absent.
 * If properties are the same, does nothing.
 *
 * @param table Iceberg table instance
 * @param tableProperties table properties from the config
 */
private void updateTableProperties(Table table, Map<String, String> tableProperties) {
    Map<String, String> currentProperties = table.properties();
    MapDifference<String, String> difference = Maps.difference(tableProperties, currentProperties);
    if (difference.areEqual()) {
        return;
    }
    UpdateProperties updateProperties = table.updateProperties();
    // collect properties that are different
    Map<String, String> propertiesToUpdate = difference.entriesDiffering().entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().leftValue(), (o, n) -> n));
    // add new properties
    propertiesToUpdate.putAll(difference.entriesOnlyOnLeft());
    logger.debug("Updating Iceberg table [{}] properties: {}", table.location(), updateProperties);
    propertiesToUpdate.forEach(updateProperties::set);
    logger.debug("Removing Iceberg table [{}] properties: {}", table.location(), difference.entriesOnlyOnRight());
    difference.entriesOnlyOnRight().keySet().forEach(updateProperties::remove);
    updateProperties.commit();
}
Also used : AlreadyExistsException(org.apache.iceberg.exceptions.AlreadyExistsException) IcebergTables(org.apache.drill.metastore.iceberg.components.tables.IcebergTables) FileSystem(org.apache.hadoop.fs.FileSystem) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) IcebergConfigConstants(org.apache.drill.metastore.iceberg.config.IcebergConfigConstants) Maps(org.apache.drill.shaded.guava.com.google.common.collect.Maps) IcebergMetastoreException(org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException) Configuration(org.apache.hadoop.conf.Configuration) NoSuchTableException(org.apache.iceberg.exceptions.NoSuchTableException) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) IcebergTableSchema(org.apache.drill.metastore.iceberg.schema.IcebergTableSchema) Metastore(org.apache.drill.metastore.Metastore) MapDifference(org.apache.drill.shaded.guava.com.google.common.collect.MapDifference) Logger(org.slf4j.Logger) Views(org.apache.drill.metastore.components.views.Views) Config(com.typesafe.config.Config) Table(org.apache.iceberg.Table) HadoopTables(org.apache.iceberg.hadoop.HadoopTables) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) UpdateProperties(org.apache.iceberg.UpdateProperties) DrillConfig(org.apache.drill.common.config.DrillConfig) Tables(org.apache.drill.metastore.components.tables.Tables) Collections(java.util.Collections) UpdateProperties(org.apache.iceberg.UpdateProperties) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 2 with Table

use of org.apache.iceberg.Table in project drill by apache.

the class TestParquetFileWriter method testTypeMismatch.

@Test
public void testTypeMismatch() throws Exception {
    Schema schema = new Schema(Types.NestedField.optional(1, "int_field", Types.IntegerType.get()));
    Record record = GenericRecord.create(schema);
    record.setField("int_field", 1);
    record.setField("int_field", "abc");
    String location = defaultFolder.newFolder("testTypeMismatch").toURI().getPath();
    Table table = tables.create(schema, location);
    thrown.expect(IcebergMetastoreException.class);
    new ParquetFileWriter(table).records(Collections.singletonList(record)).location(location).name("typeMismatch").write();
}
Also used : Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) GenericRecord(org.apache.iceberg.data.GenericRecord) Record(org.apache.iceberg.data.Record) IcebergBaseTest(org.apache.drill.metastore.iceberg.IcebergBaseTest) Test(org.junit.Test)

Example 3 with Table

use of org.apache.iceberg.Table in project drill by apache.

the class TestParquetFileWriter method testAllTypes.

@Test
public void testAllTypes() throws Exception {
    Schema schema = new Schema(Types.NestedField.optional(1, "int_field", Types.IntegerType.get()), Types.NestedField.optional(2, "long_field", Types.LongType.get()), Types.NestedField.optional(3, "float_field", Types.FloatType.get()), Types.NestedField.optional(4, "double_field", Types.DoubleType.get()), Types.NestedField.optional(5, "string_field", Types.StringType.get()), Types.NestedField.optional(6, "boolean_field", Types.BooleanType.get()), Types.NestedField.optional(7, "list_field", Types.ListType.ofOptional(9, Types.StringType.get())), Types.NestedField.optional(8, "map_field", Types.MapType.ofOptional(10, 11, Types.StringType.get(), Types.FloatType.get())));
    List<String> listValue = Arrays.asList("a", "b", "c");
    Map<String, Float> mapValue = new HashMap<>();
    mapValue.put("a", 0.1F);
    mapValue.put("b", 0.2F);
    Record record = GenericRecord.create(schema);
    record.setField("int_field", 1);
    record.setField("long_field", 100L);
    record.setField("float_field", 0.5F);
    record.setField("double_field", 1.5D);
    record.setField("string_field", "abc");
    record.setField("boolean_field", true);
    record.setField("list_field", listValue);
    record.setField("map_field", mapValue);
    String location = defaultFolder.newFolder("testAllTypes").toURI().getPath();
    String fileName = "allTypes";
    Table table = tables.create(schema, location);
    org.apache.drill.metastore.iceberg.write.File result = new ParquetFileWriter(table).records(Collections.singletonList(record)).location(location).name(fileName).write();
    String writePath = new Path(location, FileFormat.PARQUET.addExtension(fileName)).toUri().getPath();
    assertEquals(new Path(FileFormat.PARQUET.addExtension(writePath)), new Path(result.location()));
    assertEquals(Long.valueOf(1), result.metrics().recordCount());
    List<Record> rows = readData(result.input(), schema);
    assertEquals(1, rows.size());
    Record row = rows.get(0);
    assertEquals(1, row.getField("int_field"));
    assertEquals(100L, row.getField("long_field"));
    assertEquals(0.5F, row.getField("float_field"));
    assertEquals(1.5D, row.getField("double_field"));
    assertEquals("abc", row.getField("string_field"));
    assertEquals(true, row.getField("boolean_field"));
    assertEquals(listValue, row.getField("list_field"));
    assertEquals(mapValue, row.getField("map_field"));
}
Also used : Path(org.apache.hadoop.fs.Path) Table(org.apache.iceberg.Table) HashMap(java.util.HashMap) Schema(org.apache.iceberg.Schema) GenericRecord(org.apache.iceberg.data.GenericRecord) Record(org.apache.iceberg.data.Record) IcebergBaseTest(org.apache.drill.metastore.iceberg.IcebergBaseTest) Test(org.junit.Test)

Example 4 with Table

use of org.apache.iceberg.Table in project drill by apache.

the class TestParquetFileWriter method testSeveralRecords.

@Test
public void testSeveralRecords() throws Exception {
    int fieldIndex = 1;
    Schema schema = new Schema(Types.NestedField.optional(fieldIndex, "int_field", Types.IntegerType.get()));
    List<Integer> values = Arrays.asList(1, 2, 3, 3, null, null, null);
    List<Record> records = values.stream().map(value -> {
        Record record = GenericRecord.create(schema);
        record.setField("int_field", value);
        return record;
    }).collect(Collectors.toList());
    String location = defaultFolder.newFolder("testSeveralRecords").toURI().getPath();
    Table table = tables.create(schema, location);
    org.apache.drill.metastore.iceberg.write.File result = new ParquetFileWriter(table).records(records).location(location).name("severalRecords").write();
    assertEquals(Long.valueOf(7), result.metrics().recordCount());
    assertEquals(Long.valueOf(7), result.metrics().valueCounts().get(fieldIndex));
    assertEquals(Long.valueOf(3), result.metrics().nullValueCounts().get(fieldIndex));
    List<Record> rows = readData(result.input(), schema);
    assertEquals(7, rows.size());
    List<Integer> actual = rows.stream().map(row -> (Integer) row.getField("int_field")).collect(Collectors.toList());
    assertEquals(values, actual);
}
Also used : Tables(org.apache.iceberg.Tables) Arrays(java.util.Arrays) Types(org.apache.iceberg.types.Types) BeforeClass(org.junit.BeforeClass) HashMap(java.util.HashMap) IcebergMetastoreException(org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException) IcebergBaseTest(org.apache.drill.metastore.iceberg.IcebergBaseTest) GenericRecord(org.apache.iceberg.data.GenericRecord) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) CloseableIterable(org.apache.iceberg.io.CloseableIterable) Files(java.nio.file.Files) Table(org.apache.iceberg.Table) HadoopTables(org.apache.iceberg.hadoop.HadoopTables) Parquet(org.apache.iceberg.parquet.Parquet) Test(org.junit.Test) IOException(java.io.IOException) Schema(org.apache.iceberg.Schema) FileFormat(org.apache.iceberg.FileFormat) Collectors(java.util.stream.Collectors) File(java.io.File) Record(org.apache.iceberg.data.Record) List(java.util.List) Lists(org.apache.drill.shaded.guava.com.google.common.collect.Lists) Assert.assertNull(org.junit.Assert.assertNull) Paths(java.nio.file.Paths) GenericParquetReaders(org.apache.iceberg.data.parquet.GenericParquetReaders) InputFile(org.apache.iceberg.io.InputFile) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) GenericRecord(org.apache.iceberg.data.GenericRecord) Record(org.apache.iceberg.data.Record) IcebergBaseTest(org.apache.drill.metastore.iceberg.IcebergBaseTest) Test(org.junit.Test)

Example 5 with Table

use of org.apache.iceberg.Table in project presto by prestodb.

the class IcebergHadoopMetadata method getColumnHandles.

@Override
public Map<String, ColumnHandle> getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) {
    IcebergTableHandle table = (IcebergTableHandle) tableHandle;
    Table icebergTable = getHadoopIcebergTable(resourceFactory, session, table.getSchemaTableName());
    return getColumns(icebergTable.schema(), typeManager).stream().collect(toImmutableMap(IcebergColumnHandle::getName, identity()));
}
Also used : SystemTable(com.facebook.presto.spi.SystemTable) IcebergUtil.getHadoopIcebergTable(com.facebook.presto.iceberg.IcebergUtil.getHadoopIcebergTable) Table(org.apache.iceberg.Table)

Aggregations

Table (org.apache.iceberg.Table)188 Test (org.junit.Test)132 Schema (org.apache.iceberg.Schema)66 TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)56 Record (org.apache.iceberg.data.Record)56 PartitionSpec (org.apache.iceberg.PartitionSpec)51 IOException (java.io.IOException)27 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)27 List (java.util.List)22 Map (java.util.Map)20 DataFile (org.apache.iceberg.DataFile)19 NoSuchTableException (org.apache.iceberg.exceptions.NoSuchTableException)19 Collectors (java.util.stream.Collectors)18 BaseTable (org.apache.iceberg.BaseTable)18 Types (org.apache.iceberg.types.Types)18 Properties (java.util.Properties)17 Configuration (org.apache.hadoop.conf.Configuration)17 Path (org.apache.hadoop.fs.Path)17 FileFormat (org.apache.iceberg.FileFormat)16 ArrayList (java.util.ArrayList)15