Search in sources :

Example 1 with CatalogTableImpl

use of org.apache.flink.table.catalog.CatalogTableImpl in project flink by apache.

the class HivePartitionFetcherTest method testIgnoreNonExistPartition.

@Test
public void testIgnoreNonExistPartition() throws Exception {
    // it's possible a partition path exists but the partition is not added to HMS, e.g. the
    // partition is still being loaded, or the path is simply misplaced
    // make sure the fetch can ignore such paths
    HiveCatalog hiveCatalog = HiveTestUtils.createHiveCatalog();
    hiveCatalog.open();
    // create test table
    String[] fieldNames = new String[] { "i", "date" };
    DataType[] fieldTypes = new DataType[] { DataTypes.INT(), DataTypes.STRING() };
    TableSchema schema = TableSchema.builder().fields(fieldNames, fieldTypes).build();
    List<String> partitionKeys = Collections.singletonList("date");
    Map<String, String> options = new HashMap<>();
    options.put("connector", "hive");
    CatalogTable catalogTable = new CatalogTableImpl(schema, partitionKeys, options, null);
    ObjectPath tablePath = new ObjectPath("default", "test");
    hiveCatalog.createTable(tablePath, catalogTable, false);
    // add a valid partition path
    Table hiveTable = hiveCatalog.getHiveTable(tablePath);
    Path path = new Path(hiveTable.getSd().getLocation(), "date=2021-06-18");
    FileSystem fs = path.getFileSystem(hiveCatalog.getHiveConf());
    fs.mkdirs(path);
    // test partition-time order
    Configuration flinkConf = new Configuration();
    flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.PARTITION_TIME);
    HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveCatalog.getHiveVersion());
    JobConfWrapper jobConfWrapper = new JobConfWrapper(new JobConf(hiveCatalog.getHiveConf()));
    String defaultPartName = "__HIVE_DEFAULT_PARTITION__";
    MyHivePartitionFetcherContext fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
    fetcherContext.open();
    assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
    // test create-time order
    flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.CREATE_TIME);
    fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
    fetcherContext.open();
    assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
    // test partition-name order
    flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.PARTITION_NAME);
    fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
    fetcherContext.open();
    assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) Path(org.apache.hadoop.fs.Path) HiveCatalog(org.apache.flink.table.catalog.hive.HiveCatalog) ObjectPath(org.apache.flink.table.catalog.ObjectPath) CatalogTable(org.apache.flink.table.catalog.CatalogTable) Table(org.apache.hadoop.hive.metastore.api.Table) JobConfWrapper(org.apache.flink.connectors.hive.JobConfWrapper) TableSchema(org.apache.flink.table.api.TableSchema) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) CatalogTable(org.apache.flink.table.catalog.CatalogTable) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) FileSystem(org.apache.hadoop.fs.FileSystem) DataType(org.apache.flink.table.types.DataType) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Example 2 with CatalogTableImpl

use of org.apache.flink.table.catalog.CatalogTableImpl in project flink by apache.

the class HiveCatalogHiveMetadataTest method testCreateTableWithConstraints.

@Test
public void testCreateTableWithConstraints() throws Exception {
    Assume.assumeTrue(HiveVersionTestUtil.HIVE_310_OR_LATER);
    HiveCatalog hiveCatalog = (HiveCatalog) catalog;
    hiveCatalog.createDatabase(db1, createDb(), false);
    TableSchema.Builder builder = TableSchema.builder();
    builder.fields(new String[] { "x", "y", "z" }, new DataType[] { DataTypes.INT().notNull(), DataTypes.TIMESTAMP(9).notNull(), DataTypes.BIGINT() });
    builder.primaryKey("pk_name", new String[] { "x" });
    hiveCatalog.createTable(path1, new CatalogTableImpl(builder.build(), getBatchTableProperties(), null), false);
    CatalogTable catalogTable = (CatalogTable) hiveCatalog.getTable(path1);
    assertTrue("PK not present", catalogTable.getSchema().getPrimaryKey().isPresent());
    UniqueConstraint pk = catalogTable.getSchema().getPrimaryKey().get();
    assertEquals("pk_name", pk.getName());
    assertEquals(Collections.singletonList("x"), pk.getColumns());
    assertFalse(catalogTable.getSchema().getFieldDataTypes()[0].getLogicalType().isNullable());
    assertFalse(catalogTable.getSchema().getFieldDataTypes()[1].getLogicalType().isNullable());
    assertTrue(catalogTable.getSchema().getFieldDataTypes()[2].getLogicalType().isNullable());
    hiveCatalog.dropDatabase(db1, false, true);
}
Also used : TableSchema(org.apache.flink.table.api.TableSchema) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) UniqueConstraint(org.apache.flink.table.api.constraints.UniqueConstraint) CatalogTable(org.apache.flink.table.catalog.CatalogTable) Test(org.junit.Test)

Example 3 with CatalogTableImpl

use of org.apache.flink.table.catalog.CatalogTableImpl in project flink by apache.

the class HiveCatalogHiveMetadataTest method checkStatistics.

private void checkStatistics(int inputStat, int expectStat) throws Exception {
    catalog.dropTable(path1, true);
    Map<String, String> properties = new HashMap<>();
    properties.put(FactoryUtil.CONNECTOR.key(), SqlCreateHiveTable.IDENTIFIER);
    properties.put(StatsSetupConst.ROW_COUNT, String.valueOf(inputStat));
    properties.put(StatsSetupConst.NUM_FILES, String.valueOf(inputStat));
    properties.put(StatsSetupConst.TOTAL_SIZE, String.valueOf(inputStat));
    properties.put(StatsSetupConst.RAW_DATA_SIZE, String.valueOf(inputStat));
    CatalogTable catalogTable = new CatalogTableImpl(TableSchema.builder().field("f0", DataTypes.INT()).build(), properties, "");
    catalog.createTable(path1, catalogTable, false);
    CatalogTableStatistics statistics = catalog.getTableStatistics(path1);
    assertEquals(expectStat, statistics.getRowCount());
    assertEquals(expectStat, statistics.getFileCount());
    assertEquals(expectStat, statistics.getRawDataSize());
    assertEquals(expectStat, statistics.getTotalSize());
}
Also used : HashMap(java.util.HashMap) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) CatalogTable(org.apache.flink.table.catalog.CatalogTable) CatalogTableStatistics(org.apache.flink.table.catalog.stats.CatalogTableStatistics)

Example 4 with CatalogTableImpl

use of org.apache.flink.table.catalog.CatalogTableImpl in project flink by apache.

the class HiveCatalogHiveMetadataTest method testAlterTableColumnStatistics.

@Test
public void testAlterTableColumnStatistics() throws Exception {
    String hiveVersion = ((HiveCatalog) catalog).getHiveVersion();
    boolean supportDateStats = hiveVersion.compareTo(HiveShimLoader.HIVE_VERSION_V1_2_0) >= 0;
    catalog.createDatabase(db1, createDb(), false);
    TableSchema.Builder builder = TableSchema.builder().field("first", DataTypes.STRING()).field("second", DataTypes.INT()).field("third", DataTypes.BOOLEAN()).field("fourth", DataTypes.DOUBLE()).field("fifth", DataTypes.BIGINT()).field("sixth", DataTypes.BYTES()).field("seventh", DataTypes.DECIMAL(10, 3)).field("eighth", DataTypes.DECIMAL(30, 3));
    if (supportDateStats) {
        builder.field("ninth", DataTypes.DATE());
    }
    TableSchema tableSchema = builder.build();
    CatalogTable catalogTable = new CatalogTableImpl(tableSchema, getBatchTableProperties(), TEST_COMMENT);
    catalog.createTable(path1, catalogTable, false);
    Map<String, CatalogColumnStatisticsDataBase> columnStatisticsDataBaseMap = new HashMap<>();
    columnStatisticsDataBaseMap.put("first", new CatalogColumnStatisticsDataString(10L, 5.2, 3L, 100L));
    columnStatisticsDataBaseMap.put("second", new CatalogColumnStatisticsDataLong(0L, 1000L, 3L, 0L));
    columnStatisticsDataBaseMap.put("third", new CatalogColumnStatisticsDataBoolean(15L, 20L, 3L));
    columnStatisticsDataBaseMap.put("fourth", new CatalogColumnStatisticsDataDouble(15.02, 20.01, 3L, 10L));
    columnStatisticsDataBaseMap.put("fifth", new CatalogColumnStatisticsDataLong(0L, 20L, 3L, 2L));
    columnStatisticsDataBaseMap.put("sixth", new CatalogColumnStatisticsDataBinary(150L, 20D, 3L));
    columnStatisticsDataBaseMap.put("seventh", new CatalogColumnStatisticsDataDouble(1.23, 99.456, 100L, 0L));
    columnStatisticsDataBaseMap.put("eighth", new CatalogColumnStatisticsDataDouble(0.123, 123456.789, 5723L, 19L));
    if (supportDateStats) {
        columnStatisticsDataBaseMap.put("ninth", new CatalogColumnStatisticsDataDate(new Date(71L), new Date(17923L), 132L, 0L));
    }
    CatalogColumnStatistics catalogColumnStatistics = new CatalogColumnStatistics(columnStatisticsDataBaseMap);
    catalog.alterTableColumnStatistics(path1, catalogColumnStatistics, false);
    checkEquals(catalogColumnStatistics, catalog.getTableColumnStatistics(path1));
}
Also used : CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) TableSchema(org.apache.flink.table.api.TableSchema) CatalogColumnStatisticsDataBase(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBase) HashMap(java.util.HashMap) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) CatalogTable(org.apache.flink.table.catalog.CatalogTable) CatalogColumnStatisticsDataBinary(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary) Date(org.apache.flink.table.catalog.stats.Date) CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) CatalogColumnStatistics(org.apache.flink.table.catalog.stats.CatalogColumnStatistics) CatalogColumnStatisticsDataLong(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong) CatalogColumnStatisticsDataDouble(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) CatalogColumnStatisticsDataBoolean(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBoolean) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) Test(org.junit.Test)

Example 5 with CatalogTableImpl

use of org.apache.flink.table.catalog.CatalogTableImpl in project flink by apache.

the class HiveCatalogTest method testAlterHiveTableToFlinkManagedTable.

@Test
public void testAlterHiveTableToFlinkManagedTable() throws Exception {
    Map<String, String> originOptions = getLegacyFileSystemConnectorOptions("/test_path");
    originOptions.put(FactoryUtil.CONNECTOR.key(), SqlCreateHiveTable.IDENTIFIER);
    CatalogTable originTable = new CatalogTableImpl(schema, originOptions, "Hive table");
    hiveCatalog.createTable(tablePath, originTable, false);
    Map<String, String> newOptions = Collections.emptyMap();
    CatalogTable newTable = new CatalogTableImpl(schema, newOptions, "Flink managed table");
    assertThatThrownBy(() -> hiveCatalog.alterTable(tablePath, newTable, false)).isInstanceOf(IllegalArgumentException.class).hasMessageContaining("Changing catalog table type is not allowed. " + "Existing table type is 'HIVE_TABLE', but new table type is 'FLINK_MANAGED_TABLE'");
}
Also used : CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) CatalogTable(org.apache.flink.table.catalog.CatalogTable) Test(org.junit.Test)

Aggregations

CatalogTableImpl (org.apache.flink.table.catalog.CatalogTableImpl)39 CatalogTable (org.apache.flink.table.catalog.CatalogTable)26 Test (org.junit.Test)24 TableSchema (org.apache.flink.table.api.TableSchema)21 HashMap (java.util.HashMap)20 ObjectPath (org.apache.flink.table.catalog.ObjectPath)19 CatalogBaseTable (org.apache.flink.table.catalog.CatalogBaseTable)7 Configuration (org.apache.flink.configuration.Configuration)6 LinkedHashMap (java.util.LinkedHashMap)5 ValidationException (org.apache.flink.table.api.ValidationException)5 UniqueConstraint (org.apache.flink.table.api.constraints.UniqueConstraint)5 AlterTableSchemaOperation (org.apache.flink.table.operations.ddl.AlterTableSchemaOperation)5 TableColumn (org.apache.flink.table.api.TableColumn)4 ObjectIdentifier (org.apache.flink.table.catalog.ObjectIdentifier)4 Table (org.apache.hadoop.hive.metastore.api.Table)4 ArrayList (java.util.ArrayList)3 SqlCreateHiveTable (org.apache.flink.sql.parser.hive.ddl.SqlCreateHiveTable)3 ResolvedCatalogTable (org.apache.flink.table.catalog.ResolvedCatalogTable)3 IOException (java.io.IOException)2 Path (java.nio.file.Path)2