Search in sources :

Example 16 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class HiveCreateReplaceTableTest method testReplaceTableTxnTableModifiedConcurrently.

@Test
public void testReplaceTableTxnTableModifiedConcurrently() {
    Table table = catalog.createTable(TABLE_IDENTIFIER, SCHEMA, SPEC, tableLocation, Maps.newHashMap());
    Assert.assertTrue("Table should exist", catalog.tableExists(TABLE_IDENTIFIER));
    Transaction txn = catalog.newReplaceTableTransaction(TABLE_IDENTIFIER, SCHEMA, SPEC, false);
    // update the table concurrently
    table.updateProperties().set("another-prop", "another-value").commit();
    txn.updateProperties().set("prop", "value").commit();
    txn.commitTransaction();
    // the replace should still succeed
    table = catalog.loadTable(TABLE_IDENTIFIER);
    Assert.assertNull("Table props should be updated", table.properties().get("another-prop"));
    Assert.assertEquals("Table props should match", "value", table.properties().get("prop"));
}
Also used : Table(org.apache.iceberg.Table) Transaction(org.apache.iceberg.Transaction) Test(org.junit.Test)

Example 17 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class HiveIcebergMetaHook method getCatalogProperties.

/**
 * Calculates the properties we would like to send to the catalog.
 * <ul>
 * <li>The base of the properties is the properties stored at the Hive Metastore for the given table
 * <li>We add the {@link Catalogs#LOCATION} as the table location
 * <li>We add the {@link Catalogs#NAME} as TableIdentifier defined by the database name and table name
 * <li>We add the serdeProperties of the HMS table
 * <li>We remove some parameters that we don't want to push down to the Iceberg table props
 * </ul>
 * @param hmsTable Table for which we are calculating the properties
 * @return The properties we can provide for Iceberg functions, like {@link Catalogs}
 */
private static Properties getCatalogProperties(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
    Properties properties = new Properties();
    hmsTable.getParameters().entrySet().stream().filter(e -> e.getKey() != null && e.getValue() != null).forEach(e -> {
        // translate key names between HMS and Iceberg where needed
        String icebergKey = HiveTableOperations.translateToIcebergProp(e.getKey());
        properties.put(icebergKey, e.getValue());
    });
    if (properties.get(Catalogs.LOCATION) == null && hmsTable.getSd() != null && hmsTable.getSd().getLocation() != null) {
        properties.put(Catalogs.LOCATION, hmsTable.getSd().getLocation());
    }
    if (properties.get(Catalogs.NAME) == null) {
        properties.put(Catalogs.NAME, TableIdentifier.of(hmsTable.getDbName(), hmsTable.getTableName()).toString());
    }
    SerDeInfo serdeInfo = hmsTable.getSd().getSerdeInfo();
    if (serdeInfo != null) {
        serdeInfo.getParameters().entrySet().stream().filter(e -> e.getKey() != null && e.getValue() != null).forEach(e -> {
            String icebergKey = HiveTableOperations.translateToIcebergProp(e.getKey());
            properties.put(icebergKey, e.getValue());
        });
    }
    // Remove HMS table parameters we don't want to propagate to Iceberg
    PROPERTIES_TO_REMOVE.forEach(properties::remove);
    return properties;
}
Also used : PartitionSpecProxy(org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) CatalogUtil(org.apache.iceberg.CatalogUtil) UpdateSchema(org.apache.iceberg.UpdateSchema) FileSystem(org.apache.hadoop.fs.FileSystem) HiveSchemaUtil(org.apache.iceberg.hive.HiveSchemaUtil) Catalogs(org.apache.iceberg.mr.Catalogs) LoggerFactory(org.slf4j.LoggerFactory) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) TableMetadata(org.apache.iceberg.TableMetadata) DeleteFiles(org.apache.iceberg.DeleteFiles) Lists(org.apache.iceberg.relocated.com.google.common.collect.Lists) AlterTableType(org.apache.hadoop.hive.ql.ddl.table.AlterTableType) NameMapping(org.apache.iceberg.mapping.NameMapping) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) NoSuchTableException(org.apache.iceberg.exceptions.NoSuchTableException) Path(org.apache.hadoop.fs.Path) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) Splitter(org.apache.iceberg.relocated.com.google.common.base.Splitter) EnumSet(java.util.EnumSet) TableMetadataParser(org.apache.iceberg.TableMetadataParser) MetaStoreUtils(org.apache.hadoop.hive.metastore.utils.MetaStoreUtils) BaseTable(org.apache.iceberg.BaseTable) Collection(java.util.Collection) HiveMetaHook(org.apache.hadoop.hive.metastore.HiveMetaHook) InputFormatConfig(org.apache.iceberg.mr.InputFormatConfig) Set(java.util.Set) ImmutableList(org.apache.iceberg.relocated.com.google.common.collect.ImmutableList) Schema(org.apache.iceberg.Schema) PartitionSpecParser(org.apache.iceberg.PartitionSpecParser) SchemaParser(org.apache.iceberg.SchemaParser) Objects(java.util.Objects) Type(org.apache.iceberg.types.Type) List(java.util.List) UpdateProperties(org.apache.iceberg.UpdateProperties) PartitionSpec(org.apache.iceberg.PartitionSpec) Optional(java.util.Optional) TableProperties(org.apache.iceberg.TableProperties) SessionStateUtil(org.apache.hadoop.hive.ql.session.SessionStateUtil) Expressions(org.apache.iceberg.expressions.Expressions) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) TypeInfoUtils(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils) ImmutableSet(org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet) ImmutableMap(org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap) Pair(org.apache.iceberg.util.Pair) MappingUtil(org.apache.iceberg.mapping.MappingUtil) BaseMetastoreTableOperations(org.apache.iceberg.BaseMetastoreTableOperations) UpdatePartitionSpec(org.apache.iceberg.UpdatePartitionSpec) TableName(org.apache.hadoop.hive.common.TableName) PartitionTransformSpec(org.apache.hadoop.hive.ql.parse.PartitionTransformSpec) Properties(java.util.Properties) Logger(org.slf4j.Logger) TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) NameMappingParser(org.apache.iceberg.mapping.NameMappingParser) IOException(java.io.IOException) FileFormat(org.apache.iceberg.FileFormat) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Transaction(org.apache.iceberg.Transaction) Preconditions(org.apache.iceberg.relocated.com.google.common.base.Preconditions) FileIO(org.apache.iceberg.io.FileIO) Collections(java.util.Collections) org.apache.hadoop.hive.metastore.api.hive_metastoreConstants(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants) PartitionTransform(org.apache.hadoop.hive.ql.parse.PartitionTransform) HiveTableOperations(org.apache.iceberg.hive.HiveTableOperations) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) UpdateProperties(org.apache.iceberg.UpdateProperties) TableProperties(org.apache.iceberg.TableProperties) Properties(java.util.Properties)

Example 18 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class HiveTableTest method testListTables.

@Test
public void testListTables() throws TException, IOException {
    List<TableIdentifier> tableIdents = catalog.listTables(TABLE_IDENTIFIER.namespace());
    List<TableIdentifier> expectedIdents = tableIdents.stream().filter(t -> t.namespace().level(0).equals(DB_NAME) && t.name().equals(TABLE_NAME)).collect(Collectors.toList());
    Assert.assertEquals(1, expectedIdents.size());
    Assert.assertTrue(catalog.tableExists(TABLE_IDENTIFIER));
    // create a hive table
    String hiveTableName = "test_hive_table";
    org.apache.hadoop.hive.metastore.api.Table hiveTable = createHiveTable(hiveTableName);
    metastoreClient.createTable(hiveTable);
    List<TableIdentifier> tableIdents1 = catalog.listTables(TABLE_IDENTIFIER.namespace());
    Assert.assertEquals("should only 1 iceberg table .", 1, tableIdents1.size());
    Assert.assertTrue(catalog.tableExists(TABLE_IDENTIFIER));
    metastoreClient.dropTable(DB_NAME, hiveTableName);
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Types(org.apache.iceberg.types.Types) NestedField.optional(org.apache.iceberg.types.Types.NestedField.optional) ICEBERG_TABLE_TYPE_VALUE(org.apache.iceberg.BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) TABLE_TYPE_PROP(org.apache.iceberg.BaseMetastoreTableOperations.TABLE_TYPE_PROP) Lists(org.apache.iceberg.relocated.com.google.common.collect.Lists) DataFiles(org.apache.iceberg.DataFiles) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Map(java.util.Map) DataFile(org.apache.iceberg.DataFile) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) CommitFailedException(org.apache.iceberg.exceptions.CommitFailedException) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) HasTableOperations(org.apache.iceberg.HasTableOperations) Schema(org.apache.iceberg.Schema) Collectors(java.util.stream.Collectors) List(java.util.List) AvroSchemaUtil(org.apache.iceberg.avro.AvroSchemaUtil) PartitionSpec(org.apache.iceberg.PartitionSpec) TableProperties(org.apache.iceberg.TableProperties) GenericData(org.apache.hive.iceberg.org.apache.avro.generic.GenericData) NotFoundException(org.apache.iceberg.exceptions.NotFoundException) PosixFilePermissions.fromString(java.nio.file.attribute.PosixFilePermissions.fromString) GenericRecordBuilder(org.apache.hive.iceberg.org.apache.avro.generic.GenericRecordBuilder) HashMap(java.util.HashMap) ManifestFile(org.apache.iceberg.ManifestFile) Avro(org.apache.iceberg.avro.Avro) Namespace(org.apache.iceberg.catalog.Namespace) Files(org.apache.iceberg.Files) FileAppender(org.apache.iceberg.io.FileAppender) PosixFilePermissions.asFileAttribute(java.nio.file.attribute.PosixFilePermissions.asFileAttribute) TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Maps(org.apache.iceberg.relocated.com.google.common.collect.Maps) TException(org.apache.thrift.TException) IOException(java.io.IOException) Test(org.junit.Test) File(java.io.File) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Rule(org.junit.Rule) NestedField.required(org.apache.iceberg.types.Types.NestedField.required) ConfigProperties(org.apache.iceberg.hadoop.ConfigProperties) TableType(org.apache.hadoop.hive.metastore.TableType) METADATA_LOCATION_PROP(org.apache.iceberg.BaseMetastoreTableOperations.METADATA_LOCATION_PROP) Assert(org.junit.Assert) Collections(java.util.Collections) org.apache.hadoop.hive.metastore.api.hive_metastoreConstants(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants) TemporaryFolder(org.junit.rules.TemporaryFolder) PosixFilePermissions.fromString(java.nio.file.attribute.PosixFilePermissions.fromString) Test(org.junit.Test)

Example 19 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class HiveTableTest method testDropTable.

@Test
public void testDropTable() throws IOException {
    Table table = catalog.loadTable(TABLE_IDENTIFIER);
    GenericRecordBuilder recordBuilder = new GenericRecordBuilder(AvroSchemaUtil.convert(schema, "test"));
    List<GenericData.Record> records = Lists.newArrayList(recordBuilder.set("id", 1L).build(), recordBuilder.set("id", 2L).build(), recordBuilder.set("id", 3L).build());
    String location1 = table.location().replace("file:", "") + "/data/file1.avro";
    try (FileAppender<GenericData.Record> writer = Avro.write(Files.localOutput(location1)).schema(schema).named("test").build()) {
        for (GenericData.Record rec : records) {
            writer.add(rec);
        }
    }
    String location2 = table.location().replace("file:", "") + "/data/file2.avro";
    try (FileAppender<GenericData.Record> writer = Avro.write(Files.localOutput(location2)).schema(schema).named("test").build()) {
        for (GenericData.Record rec : records) {
            writer.add(rec);
        }
    }
    DataFile file1 = DataFiles.builder(table.spec()).withRecordCount(3).withPath(location1).withFileSizeInBytes(Files.localInput(location2).getLength()).build();
    DataFile file2 = DataFiles.builder(table.spec()).withRecordCount(3).withPath(location2).withFileSizeInBytes(Files.localInput(location1).getLength()).build();
    // add both data files
    table.newAppend().appendFile(file1).appendFile(file2).commit();
    // delete file2
    table.newDelete().deleteFile(file2.path()).commit();
    String manifestListLocation = table.currentSnapshot().manifestListLocation().replace("file:", "");
    List<ManifestFile> manifests = table.currentSnapshot().allManifests();
    Assert.assertTrue("Drop (table and data) should return true and drop the table", catalog.dropTable(TABLE_IDENTIFIER));
    Assert.assertFalse("Table should not exist", catalog.tableExists(TABLE_IDENTIFIER));
    Assert.assertFalse("Table data files should not exist", new File(location1).exists());
    Assert.assertFalse("Table data files should not exist", new File(location2).exists());
    Assert.assertFalse("Table manifest list files should not exist", new File(manifestListLocation).exists());
    for (ManifestFile manifest : manifests) {
        Assert.assertFalse("Table manifest files should not exist", new File(manifest.path().replace("file:", "")).exists());
    }
    Assert.assertFalse("Table metadata file should not exist", new File(((HasTableOperations) table).operations().current().metadataFileLocation().replace("file:", "")).exists());
}
Also used : Table(org.apache.iceberg.Table) PosixFilePermissions.fromString(java.nio.file.attribute.PosixFilePermissions.fromString) GenericData(org.apache.hive.iceberg.org.apache.avro.generic.GenericData) ManifestFile(org.apache.iceberg.ManifestFile) DataFile(org.apache.iceberg.DataFile) GenericRecordBuilder(org.apache.hive.iceberg.org.apache.avro.generic.GenericRecordBuilder) HasTableOperations(org.apache.iceberg.HasTableOperations) DataFile(org.apache.iceberg.DataFile) ManifestFile(org.apache.iceberg.ManifestFile) File(java.io.File) Test(org.junit.Test)

Example 20 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class HiveTableTest method testDropWithoutPurgeLeavesTableData.

@Test
public void testDropWithoutPurgeLeavesTableData() throws IOException {
    Table table = catalog.loadTable(TABLE_IDENTIFIER);
    GenericRecordBuilder recordBuilder = new GenericRecordBuilder(AvroSchemaUtil.convert(schema, "test"));
    List<GenericData.Record> records = Lists.newArrayList(recordBuilder.set("id", 1L).build(), recordBuilder.set("id", 2L).build(), recordBuilder.set("id", 3L).build());
    String fileLocation = table.location().replace("file:", "") + "/data/file.avro";
    try (FileAppender<GenericData.Record> writer = Avro.write(Files.localOutput(fileLocation)).schema(schema).named("test").build()) {
        for (GenericData.Record rec : records) {
            writer.add(rec);
        }
    }
    DataFile file = DataFiles.builder(table.spec()).withRecordCount(3).withPath(fileLocation).withFileSizeInBytes(Files.localInput(fileLocation).getLength()).build();
    table.newAppend().appendFile(file).commit();
    String manifestListLocation = table.currentSnapshot().manifestListLocation().replace("file:", "");
    Assert.assertTrue("Drop should return true and drop the table", catalog.dropTable(TABLE_IDENTIFIER, false));
    Assert.assertFalse("Table should not exist", catalog.tableExists(TABLE_IDENTIFIER));
    Assert.assertTrue("Table data files should exist", new File(fileLocation).exists());
    Assert.assertTrue("Table metadata files should exist", new File(manifestListLocation).exists());
}
Also used : DataFile(org.apache.iceberg.DataFile) Table(org.apache.iceberg.Table) GenericRecordBuilder(org.apache.hive.iceberg.org.apache.avro.generic.GenericRecordBuilder) PosixFilePermissions.fromString(java.nio.file.attribute.PosixFilePermissions.fromString) GenericData(org.apache.hive.iceberg.org.apache.avro.generic.GenericData) DataFile(org.apache.iceberg.DataFile) ManifestFile(org.apache.iceberg.ManifestFile) File(java.io.File) Test(org.junit.Test)

Aggregations

Table (org.apache.iceberg.Table)188 Test (org.junit.Test)132 Schema (org.apache.iceberg.Schema)66 TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)56 Record (org.apache.iceberg.data.Record)56 PartitionSpec (org.apache.iceberg.PartitionSpec)51 IOException (java.io.IOException)27 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)27 List (java.util.List)22 Map (java.util.Map)20 DataFile (org.apache.iceberg.DataFile)19 NoSuchTableException (org.apache.iceberg.exceptions.NoSuchTableException)19 Collectors (java.util.stream.Collectors)18 BaseTable (org.apache.iceberg.BaseTable)18 Types (org.apache.iceberg.types.Types)18 Properties (java.util.Properties)17 Configuration (org.apache.hadoop.conf.Configuration)17 Path (org.apache.hadoop.fs.Path)17 FileFormat (org.apache.iceberg.FileFormat)16 ArrayList (java.util.ArrayList)15