Search in sources :

Example 41 with TableIdentifier

use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.

the class TestHiveIcebergSelects method testSpecialCharacters.

@Test
public void testSpecialCharacters() {
    TableIdentifier table = TableIdentifier.of("default", "tar,! ,get");
    // note: the Chinese character seems to be accepted in the column name, but not
    // in the table name - this is the case for both Iceberg and standard Hive tables.
    shell.executeStatement(String.format("CREATE TABLE `%s` (id bigint, `dep,! 是,t` string) STORED BY ICEBERG STORED AS %s %s %s", table.name(), fileFormat, testTables.locationForCreateTableSQL(table), testTables.propertiesForCreateTableSQL(ImmutableMap.of())));
    shell.executeStatement(String.format("INSERT INTO `%s` VALUES (1, 'moon'), (2, 'star')", table.name()));
    List<Object[]> result = shell.executeStatement(String.format("SELECT `dep,! 是,t`, id FROM `%s` ORDER BY id", table.name()));
    Assert.assertEquals(2, result.size());
    Assert.assertArrayEquals(new Object[] { "moon", 1L }, result.get(0));
    Assert.assertArrayEquals(new Object[] { "star", 2L }, result.get(1));
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Test(org.junit.Test)

Example 42 with TableIdentifier

use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.

the class TestHiveIcebergStatistics method testStatsWithInsert.

@Test
public void testStatsWithInsert() {
    TableIdentifier identifier = TableIdentifier.of("default", "customers");
    shell.setHiveSessionValue(HiveConf.ConfVars.HIVESTATSAUTOGATHER.varname, true);
    testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, PartitionSpec.unpartitioned(), fileFormat, ImmutableList.of());
    if (testTableType != TestTables.TestTableType.HIVE_CATALOG) {
        // If the location is set and we have to gather stats, then we have to update the table stats now
        shell.executeStatement("ANALYZE TABLE " + identifier + " COMPUTE STATISTICS FOR COLUMNS");
    }
    String insert = testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, identifier, false);
    shell.executeStatement(insert);
    checkColStat(identifier.name(), "customer_id", true);
    checkColStatMinMaxValue(identifier.name(), "customer_id", 0, 2);
    insert = testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS, identifier, false);
    shell.executeStatement(insert);
    checkColStat(identifier.name(), "customer_id", true);
    checkColStatMinMaxValue(identifier.name(), "customer_id", 0, 5);
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Test(org.junit.Test)

Example 43 with TableIdentifier

use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.

the class TestCatalogs method testCreateDropTableToCatalog.

@Test
public void testCreateDropTableToCatalog() throws IOException {
    TableIdentifier identifier = TableIdentifier.of("test", "table");
    String defaultCatalogName = "default";
    String warehouseLocation = temp.newFolder("hadoop", "warehouse").toString();
    setCustomCatalogProperties(defaultCatalogName, warehouseLocation);
    Properties missingSchema = new Properties();
    missingSchema.put("name", identifier.toString());
    missingSchema.put(InputFormatConfig.CATALOG_NAME, defaultCatalogName);
    AssertHelpers.assertThrows("Should complain about table schema not set", NullPointerException.class, "schema not set", () -> Catalogs.createTable(conf, missingSchema));
    Properties missingIdentifier = new Properties();
    missingIdentifier.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(SCHEMA));
    missingIdentifier.put(InputFormatConfig.CATALOG_NAME, defaultCatalogName);
    AssertHelpers.assertThrows("Should complain about table identifier not set", NullPointerException.class, "identifier not set", () -> Catalogs.createTable(conf, missingIdentifier));
    Properties properties = new Properties();
    properties.put("name", identifier.toString());
    properties.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(SCHEMA));
    properties.put(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(SPEC));
    properties.put("dummy", "test");
    properties.put(InputFormatConfig.CATALOG_NAME, defaultCatalogName);
    Catalogs.createTable(conf, properties);
    HadoopCatalog catalog = new CustomHadoopCatalog(conf, warehouseLocation);
    Table table = catalog.loadTable(identifier);
    Assert.assertEquals(SchemaParser.toJson(SCHEMA), SchemaParser.toJson(table.schema()));
    Assert.assertEquals(PartitionSpecParser.toJson(SPEC), PartitionSpecParser.toJson(table.spec()));
    Assert.assertEquals(Collections.singletonMap("dummy", "test"), table.properties());
    AssertHelpers.assertThrows("Should complain about table identifier not set", NullPointerException.class, "identifier not set", () -> Catalogs.dropTable(conf, new Properties()));
    Properties dropProperties = new Properties();
    dropProperties.put("name", identifier.toString());
    dropProperties.put(InputFormatConfig.CATALOG_NAME, defaultCatalogName);
    Catalogs.dropTable(conf, dropProperties);
    AssertHelpers.assertThrows("Should complain about table not found", NoSuchTableException.class, "Table does not exist", () -> Catalogs.loadTable(conf, dropProperties));
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) HadoopCatalog(org.apache.iceberg.hadoop.HadoopCatalog) CatalogProperties(org.apache.iceberg.CatalogProperties) Properties(java.util.Properties) Test(org.junit.Test)

Example 44 with TableIdentifier

use of org.apache.iceberg.catalog.TableIdentifier in project incubator-gobblin by apache.

the class IcebergMetadataWriter method flush.

/**
 * For flush of each table, we do the following logic:
 * 1. Commit the appendFiles if it exist
 * 2. Update the new table property: high watermark of GMCE, data offset range, schema versions
 * 3. Update the schema
 * 4. Commit the transaction
 * 5. reset tableMetadata
 * @param dbName
 * @param tableName
 */
@Override
public void flush(String dbName, String tableName) throws IOException {
    Lock writeLock = readWriteLock.writeLock();
    writeLock.lock();
    try {
        TableIdentifier tid = TableIdentifier.of(dbName, tableName);
        TableMetadata tableMetadata = tableMetadataMap.getOrDefault(tid, new TableMetadata());
        if (tableMetadata.transaction.isPresent()) {
            Transaction transaction = tableMetadata.transaction.get();
            Map<String, String> props = tableMetadata.newProperties.or(Maps.newHashMap(tableMetadata.lastProperties.or(getIcebergTable(tid).properties())));
            if (tableMetadata.appendFiles.isPresent()) {
                tableMetadata.appendFiles.get().commit();
                if (tableMetadata.completenessEnabled) {
                    String topicName = props.get(TOPIC_NAME_KEY);
                    if (topicName == null) {
                        log.error(String.format("Not performing audit check. %s is null. Please set as table property of %s.%s", TOPIC_NAME_KEY, dbName, tableName));
                    } else {
                        long newCompletenessWatermark = computeCompletenessWatermark(topicName, tableMetadata.datePartitions, tableMetadata.prevCompletenessWatermark);
                        if (newCompletenessWatermark > tableMetadata.prevCompletenessWatermark) {
                            log.info(String.format("Updating %s for %s.%s to %s", COMPLETION_WATERMARK_KEY, dbName, tableName, newCompletenessWatermark));
                            props.put(COMPLETION_WATERMARK_KEY, String.valueOf(newCompletenessWatermark));
                            props.put(COMPLETION_WATERMARK_TIMEZONE_KEY, this.timeZone);
                            tableMetadata.newCompletenessWatermark = newCompletenessWatermark;
                        }
                    }
                }
            }
            if (tableMetadata.deleteFiles.isPresent()) {
                tableMetadata.deleteFiles.get().commit();
            }
            // Set high waterMark
            Long highWatermark = tableCurrentWatermarkMap.get(tid);
            props.put(String.format(GMCE_HIGH_WATERMARK_KEY, tableTopicPartitionMap.get(tid)), highWatermark.toString());
            // Set low waterMark
            props.put(String.format(GMCE_LOW_WATERMARK_KEY, tableTopicPartitionMap.get(tid)), tableMetadata.lowWatermark.get().toString());
            // Set whether to delete metadata files after commit
            props.put(TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED, Boolean.toString(conf.getBoolean(TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED, TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT)));
            props.put(TableProperties.METADATA_PREVIOUS_VERSIONS_MAX, Integer.toString(conf.getInt(TableProperties.METADATA_PREVIOUS_VERSIONS_MAX, TableProperties.METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT)));
            // Set data offset range
            boolean containOffsetRange = setDatasetOffsetRange(tableMetadata, props);
            String topicName = tableName;
            if (containOffsetRange) {
                String topicPartitionString = tableMetadata.dataOffsetRange.get().keySet().iterator().next();
                // In case the topic name is not the table name or the topic name contains '-'
                topicName = topicPartitionString.substring(0, topicPartitionString.lastIndexOf('-'));
            }
            // Update schema(commit)
            updateSchema(tableMetadata, props, topicName);
            // Update properties
            UpdateProperties updateProperties = transaction.updateProperties();
            props.forEach(updateProperties::set);
            updateProperties.commit();
            try (AutoCloseableHiveLock lock = this.locks.getTableLock(dbName, tableName)) {
                transaction.commitTransaction();
            }
            // Emit GTE for snapshot commits
            Snapshot snapshot = tableMetadata.table.get().currentSnapshot();
            Map<String, String> currentProps = tableMetadata.table.get().properties();
            submitSnapshotCommitEvent(snapshot, tableMetadata, dbName, tableName, currentProps, highWatermark);
            // Reset the table metadata for next accumulation period
            tableMetadata.reset(currentProps, highWatermark, tableMetadata.newCompletenessWatermark);
            log.info(String.format("Finish commit of new snapshot %s for table %s", snapshot.snapshotId(), tid.toString()));
        } else {
            log.info("There's no transaction initiated for the table {}", tid.toString());
        }
    } catch (RuntimeException e) {
        throw new RuntimeException(String.format("Fail to flush table %s %s", dbName, tableName), e);
    } catch (Exception e) {
        throw new IOException(String.format("Fail to flush table %s %s", dbName, tableName), e);
    } finally {
        writeLock.unlock();
    }
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) IOException(java.io.IOException) AlreadyExistsException(org.apache.iceberg.exceptions.AlreadyExistsException) SchemaRegistryException(org.apache.gobblin.metrics.kafka.SchemaRegistryException) NoSuchTableException(org.apache.iceberg.exceptions.NoSuchTableException) IOException(java.io.IOException) ReadWriteLock(java.util.concurrent.locks.ReadWriteLock) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) Lock(java.util.concurrent.locks.Lock) AutoCloseableHiveLock(org.apache.gobblin.hive.AutoCloseableHiveLock) HiveLock(org.apache.gobblin.hive.HiveLock) Snapshot(org.apache.iceberg.Snapshot) UpdateProperties(org.apache.iceberg.UpdateProperties) Transaction(org.apache.iceberg.Transaction) AutoCloseableHiveLock(org.apache.gobblin.hive.AutoCloseableHiveLock)

Example 45 with TableIdentifier

use of org.apache.iceberg.catalog.TableIdentifier in project incubator-gobblin by apache.

the class IcebergMetadataWriter method submitSnapshotCommitEvent.

private void submitSnapshotCommitEvent(Snapshot snapshot, TableMetadata tableMetadata, String dbName, String tableName, Map<String, String> props, Long highWaterMark) {
    GobblinEventBuilder gobblinTrackingEvent = new GobblinEventBuilder(IcebergMCEMetadataKeys.ICEBERG_COMMIT_EVENT_NAME);
    long currentSnapshotID = snapshot.snapshotId();
    long endToEndLag = System.currentTimeMillis() - tableMetadata.lowestGMCEEmittedTime;
    TableIdentifier tid = TableIdentifier.of(dbName, tableName);
    String gmceTopicPartition = tableTopicPartitionMap.get(tid);
    // Add information to automatically trigger repair jon when data loss happen
    gobblinTrackingEvent.addMetadata(IcebergMCEMetadataKeys.GMCE_TOPIC_NAME, gmceTopicPartition.split("-")[0]);
    gobblinTrackingEvent.addMetadata(IcebergMCEMetadataKeys.GMCE_TOPIC_PARTITION, gmceTopicPartition.split("-")[1]);
    gobblinTrackingEvent.addMetadata(IcebergMCEMetadataKeys.GMCE_HIGH_WATERMARK, highWaterMark.toString());
    gobblinTrackingEvent.addMetadata(IcebergMCEMetadataKeys.GMCE_LOW_WATERMARK, tableMetadata.lowWatermark.get().toString());
    // Add information for lag monitoring
    gobblinTrackingEvent.addMetadata(IcebergMCEMetadataKeys.LAG_KEY_NAME, Long.toString(endToEndLag));
    gobblinTrackingEvent.addMetadata(IcebergMCEMetadataKeys.SNAPSHOT_KEY_NAME, Long.toString(currentSnapshotID));
    gobblinTrackingEvent.addMetadata(IcebergMCEMetadataKeys.MANIFEST_LOCATION, snapshot.manifestListLocation());
    gobblinTrackingEvent.addMetadata(IcebergMCEMetadataKeys.SNAPSHOT_INFORMATION_KEY_NAME, Joiner.on(",").withKeyValueSeparator("=").join(snapshot.summary()));
    gobblinTrackingEvent.addMetadata(IcebergMCEMetadataKeys.TABLE_KEY_NAME, tableName);
    gobblinTrackingEvent.addMetadata(IcebergMCEMetadataKeys.DATABASE_KEY_NAME, dbName);
    gobblinTrackingEvent.addMetadata(IcebergMCEMetadataKeys.DATASET_HDFS_PATH, tableMetadata.datasetName);
    for (Map.Entry<String, String> entry : props.entrySet()) {
        if (entry.getKey().startsWith(OFFSET_RANGE_KEY_PREFIX)) {
            gobblinTrackingEvent.addMetadata(entry.getKey(), entry.getValue());
        }
    }
    eventSubmitter.submit(gobblinTrackingEvent);
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) GobblinEventBuilder(org.apache.gobblin.metrics.event.GobblinEventBuilder) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)87 Test (org.junit.Test)69 Table (org.apache.iceberg.Table)56 PartitionSpec (org.apache.iceberg.PartitionSpec)27 Schema (org.apache.iceberg.Schema)25 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)16 BaseTable (org.apache.iceberg.BaseTable)15 UpdateSchema (org.apache.iceberg.UpdateSchema)15 List (java.util.List)13 NoSuchTableException (org.apache.iceberg.exceptions.NoSuchTableException)13 ArrayList (java.util.ArrayList)11 ImmutableList (org.apache.iceberg.relocated.com.google.common.collect.ImmutableList)11 IOException (java.io.IOException)10 Map (java.util.Map)10 Types (org.apache.iceberg.types.Types)10 HashMap (java.util.HashMap)9 Path (org.apache.hadoop.fs.Path)9 TableProperties (org.apache.iceberg.TableProperties)9 Collections (java.util.Collections)8 Properties (java.util.Properties)8