Search in sources :

Example 1 with TableSchema

use of org.apache.flink.table.api.TableSchema in project flink by apache.

the class HivePartitionFetcherTest method testIgnoreNonExistPartition.

@Test
public void testIgnoreNonExistPartition() throws Exception {
    // it's possible a partition path exists but the partition is not added to HMS, e.g. the
    // partition is still being loaded, or the path is simply misplaced
    // make sure the fetch can ignore such paths
    HiveCatalog hiveCatalog = HiveTestUtils.createHiveCatalog();
    hiveCatalog.open();
    // create test table
    String[] fieldNames = new String[] { "i", "date" };
    DataType[] fieldTypes = new DataType[] { DataTypes.INT(), DataTypes.STRING() };
    TableSchema schema = TableSchema.builder().fields(fieldNames, fieldTypes).build();
    List<String> partitionKeys = Collections.singletonList("date");
    Map<String, String> options = new HashMap<>();
    options.put("connector", "hive");
    CatalogTable catalogTable = new CatalogTableImpl(schema, partitionKeys, options, null);
    ObjectPath tablePath = new ObjectPath("default", "test");
    hiveCatalog.createTable(tablePath, catalogTable, false);
    // add a valid partition path
    Table hiveTable = hiveCatalog.getHiveTable(tablePath);
    Path path = new Path(hiveTable.getSd().getLocation(), "date=2021-06-18");
    FileSystem fs = path.getFileSystem(hiveCatalog.getHiveConf());
    fs.mkdirs(path);
    // test partition-time order
    Configuration flinkConf = new Configuration();
    flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.PARTITION_TIME);
    HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveCatalog.getHiveVersion());
    JobConfWrapper jobConfWrapper = new JobConfWrapper(new JobConf(hiveCatalog.getHiveConf()));
    String defaultPartName = "__HIVE_DEFAULT_PARTITION__";
    MyHivePartitionFetcherContext fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
    fetcherContext.open();
    assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
    // test create-time order
    flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.CREATE_TIME);
    fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
    fetcherContext.open();
    assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
    // test partition-name order
    flinkConf.set(STREAMING_SOURCE_PARTITION_ORDER, HiveOptions.PartitionOrder.PARTITION_NAME);
    fetcherContext = new MyHivePartitionFetcherContext(tablePath, hiveShim, jobConfWrapper, partitionKeys, fieldTypes, fieldNames, flinkConf, defaultPartName);
    fetcherContext.open();
    assertEquals(0, fetcherContext.getComparablePartitionValueList().size());
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) Path(org.apache.hadoop.fs.Path) HiveCatalog(org.apache.flink.table.catalog.hive.HiveCatalog) ObjectPath(org.apache.flink.table.catalog.ObjectPath) CatalogTable(org.apache.flink.table.catalog.CatalogTable) Table(org.apache.hadoop.hive.metastore.api.Table) JobConfWrapper(org.apache.flink.connectors.hive.JobConfWrapper) TableSchema(org.apache.flink.table.api.TableSchema) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) CatalogTable(org.apache.flink.table.catalog.CatalogTable) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) FileSystem(org.apache.hadoop.fs.FileSystem) DataType(org.apache.flink.table.types.DataType) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Example 2 with TableSchema

use of org.apache.flink.table.api.TableSchema in project flink by apache.

the class HiveCatalogHiveMetadataTest method testAlterTableColumnStatistics.

@Test
public void testAlterTableColumnStatistics() throws Exception {
    String hiveVersion = ((HiveCatalog) catalog).getHiveVersion();
    boolean supportDateStats = hiveVersion.compareTo(HiveShimLoader.HIVE_VERSION_V1_2_0) >= 0;
    catalog.createDatabase(db1, createDb(), false);
    TableSchema.Builder builder = TableSchema.builder().field("first", DataTypes.STRING()).field("second", DataTypes.INT()).field("third", DataTypes.BOOLEAN()).field("fourth", DataTypes.DOUBLE()).field("fifth", DataTypes.BIGINT()).field("sixth", DataTypes.BYTES()).field("seventh", DataTypes.DECIMAL(10, 3)).field("eighth", DataTypes.DECIMAL(30, 3));
    if (supportDateStats) {
        builder.field("ninth", DataTypes.DATE());
    }
    TableSchema tableSchema = builder.build();
    CatalogTable catalogTable = new CatalogTableImpl(tableSchema, getBatchTableProperties(), TEST_COMMENT);
    catalog.createTable(path1, catalogTable, false);
    Map<String, CatalogColumnStatisticsDataBase> columnStatisticsDataBaseMap = new HashMap<>();
    columnStatisticsDataBaseMap.put("first", new CatalogColumnStatisticsDataString(10L, 5.2, 3L, 100L));
    columnStatisticsDataBaseMap.put("second", new CatalogColumnStatisticsDataLong(0L, 1000L, 3L, 0L));
    columnStatisticsDataBaseMap.put("third", new CatalogColumnStatisticsDataBoolean(15L, 20L, 3L));
    columnStatisticsDataBaseMap.put("fourth", new CatalogColumnStatisticsDataDouble(15.02, 20.01, 3L, 10L));
    columnStatisticsDataBaseMap.put("fifth", new CatalogColumnStatisticsDataLong(0L, 20L, 3L, 2L));
    columnStatisticsDataBaseMap.put("sixth", new CatalogColumnStatisticsDataBinary(150L, 20D, 3L));
    columnStatisticsDataBaseMap.put("seventh", new CatalogColumnStatisticsDataDouble(1.23, 99.456, 100L, 0L));
    columnStatisticsDataBaseMap.put("eighth", new CatalogColumnStatisticsDataDouble(0.123, 123456.789, 5723L, 19L));
    if (supportDateStats) {
        columnStatisticsDataBaseMap.put("ninth", new CatalogColumnStatisticsDataDate(new Date(71L), new Date(17923L), 132L, 0L));
    }
    CatalogColumnStatistics catalogColumnStatistics = new CatalogColumnStatistics(columnStatisticsDataBaseMap);
    catalog.alterTableColumnStatistics(path1, catalogColumnStatistics, false);
    checkEquals(catalogColumnStatistics, catalog.getTableColumnStatistics(path1));
}
Also used : CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) TableSchema(org.apache.flink.table.api.TableSchema) CatalogColumnStatisticsDataBase(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBase) HashMap(java.util.HashMap) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) CatalogTable(org.apache.flink.table.catalog.CatalogTable) CatalogColumnStatisticsDataBinary(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary) Date(org.apache.flink.table.catalog.stats.Date) CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) CatalogColumnStatistics(org.apache.flink.table.catalog.stats.CatalogColumnStatistics) CatalogColumnStatisticsDataLong(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong) CatalogColumnStatisticsDataDouble(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) CatalogColumnStatisticsDataBoolean(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBoolean) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) Test(org.junit.Test)

Example 3 with TableSchema

use of org.apache.flink.table.api.TableSchema in project flink by apache.

the class HiveCatalogITCase method testTableWithPrimaryKey.

@Test
public void testTableWithPrimaryKey() {
    TableEnvironment tableEnv = TableEnvironment.create(EnvironmentSettings.inStreamingMode());
    tableEnv.getConfig().getConfiguration().setInteger(TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 1);
    tableEnv.registerCatalog("catalog1", hiveCatalog);
    tableEnv.useCatalog("catalog1");
    final String createTable = "CREATE TABLE pk_src (\n" + "  uuid varchar(40) not null,\n" + "  price DECIMAL(10, 2),\n" + "  currency STRING,\n" + "  ts6 TIMESTAMP(6),\n" + "  ts AS CAST(ts6 AS TIMESTAMP(3)),\n" + "  WATERMARK FOR ts AS ts,\n" + "  constraint ct1 PRIMARY KEY(uuid) NOT ENFORCED)\n" + "  WITH (\n" + "    'connector.type' = 'filesystem'," + "    'connector.path' = 'file://fakePath'," + "    'format.type' = 'csv')";
    tableEnv.executeSql(createTable);
    TableSchema tableSchema = tableEnv.getCatalog(tableEnv.getCurrentCatalog()).map(catalog -> {
        try {
            final ObjectPath tablePath = ObjectPath.fromString(catalog.getDefaultDatabase() + '.' + "pk_src");
            return catalog.getTable(tablePath).getSchema();
        } catch (TableNotExistException e) {
            return null;
        }
    }).orElse(null);
    assertThat(tableSchema).isNotNull();
    assertThat(tableSchema.getPrimaryKey()).hasValue(UniqueConstraint.primaryKey("ct1", Collections.singletonList("uuid")));
    tableEnv.executeSql("DROP TABLE pk_src");
}
Also used : Arrays(java.util.Arrays) Schema(org.apache.flink.table.api.Schema) FileUtils(org.apache.flink.util.FileUtils) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) CatalogTable(org.apache.flink.table.catalog.CatalogTable) FLINK_PROPERTY_PREFIX(org.apache.flink.table.catalog.CatalogPropertiesUtil.FLINK_PROPERTY_PREFIX) Future(java.util.concurrent.Future) Map(java.util.Map) URI(java.net.URI) Path(java.nio.file.Path) TableEnvironment(org.apache.flink.table.api.TableEnvironment) AfterClass(org.junit.AfterClass) Expressions.$(org.apache.flink.table.api.Expressions.$) TableSchema(org.apache.flink.table.api.TableSchema) Table(org.apache.flink.table.api.Table) TestCollectionTableFactory(org.apache.flink.table.planner.factories.utils.TestCollectionTableFactory) Executors(java.util.concurrent.Executors) List(java.util.List) FactoryUtil(org.apache.flink.table.factories.FactoryUtil) ManagedTableFactory(org.apache.flink.table.factories.ManagedTableFactory) Row(org.apache.flink.types.Row) UniqueConstraint(org.apache.flink.table.api.constraints.UniqueConstraint) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) BeforeClass(org.junit.BeforeClass) ByteArrayOutputStream(java.io.ByteArrayOutputStream) TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM(org.apache.flink.table.api.config.ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) ObjectPath(org.apache.flink.table.catalog.ObjectPath) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) CatalogView(org.apache.flink.table.catalog.CatalogView) Catalog(org.apache.flink.table.catalog.Catalog) TestManagedTableFactory(org.apache.flink.table.factories.TestManagedTableFactory) ExecutorService(java.util.concurrent.ExecutorService) AbstractDataType(org.apache.flink.table.types.AbstractDataType) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) PrintStream(java.io.PrintStream) TableNotExistException(org.apache.flink.table.catalog.exceptions.TableNotExistException) Files(java.nio.file.Files) Configuration(org.apache.flink.configuration.Configuration) DataTypes(org.apache.flink.table.api.DataTypes) Test(org.junit.Test) CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) CollectionUtil(org.apache.flink.util.CollectionUtil) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) CONNECTOR(org.apache.flink.table.factories.FactoryUtil.CONNECTOR) Rule(org.junit.Rule) CoreOptions(org.apache.flink.configuration.CoreOptions) Paths(java.nio.file.Paths) SqlDialect(org.apache.flink.table.api.SqlDialect) EnvironmentSettings(org.apache.flink.table.api.EnvironmentSettings) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) Comparator(java.util.Comparator) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) ObjectPath(org.apache.flink.table.catalog.ObjectPath) TableSchema(org.apache.flink.table.api.TableSchema) TableNotExistException(org.apache.flink.table.catalog.exceptions.TableNotExistException) TableEnvironment(org.apache.flink.table.api.TableEnvironment) Test(org.junit.Test)

Example 4 with TableSchema

use of org.apache.flink.table.api.TableSchema in project flink by apache.

the class HiveTableFactoryTest method testGenericTable.

@Test
public void testGenericTable() throws Exception {
    final TableSchema schema = TableSchema.builder().field("name", DataTypes.STRING()).field("age", DataTypes.INT()).build();
    catalog.createDatabase("mydb", new CatalogDatabaseImpl(new HashMap<>(), ""), true);
    final Map<String, String> options = Collections.singletonMap(FactoryUtil.CONNECTOR.key(), "COLLECTION");
    final CatalogTable table = new CatalogTableImpl(schema, options, "csv table");
    catalog.createTable(new ObjectPath("mydb", "mytable"), table, true);
    final Optional<TableFactory> tableFactoryOpt = catalog.getTableFactory();
    assertTrue(tableFactoryOpt.isPresent());
    final HiveTableFactory tableFactory = (HiveTableFactory) tableFactoryOpt.get();
    final TableSource tableSource = tableFactory.createTableSource(new TableSourceFactoryContextImpl(ObjectIdentifier.of("mycatalog", "mydb", "mytable"), table, new Configuration(), false));
    assertTrue(tableSource instanceof StreamTableSource);
    final TableSink tableSink = tableFactory.createTableSink(new TableSinkFactoryContextImpl(ObjectIdentifier.of("mycatalog", "mydb", "mytable"), table, new Configuration(), true, false));
    assertTrue(tableSink instanceof StreamTableSink);
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) TableSchema(org.apache.flink.table.api.TableSchema) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) StreamTableSink(org.apache.flink.table.sinks.StreamTableSink) TableSink(org.apache.flink.table.sinks.TableSink) DynamicTableSink(org.apache.flink.table.connector.sink.DynamicTableSink) StreamTableSink(org.apache.flink.table.sinks.StreamTableSink) CatalogTable(org.apache.flink.table.catalog.CatalogTable) ResolvedCatalogTable(org.apache.flink.table.catalog.ResolvedCatalogTable) StreamTableSource(org.apache.flink.table.sources.StreamTableSource) CatalogDatabaseImpl(org.apache.flink.table.catalog.CatalogDatabaseImpl) TableSinkFactoryContextImpl(org.apache.flink.table.factories.TableSinkFactoryContextImpl) TableSource(org.apache.flink.table.sources.TableSource) DynamicTableSource(org.apache.flink.table.connector.source.DynamicTableSource) StreamTableSource(org.apache.flink.table.sources.StreamTableSource) TableFactory(org.apache.flink.table.factories.TableFactory) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) TableSourceFactoryContextImpl(org.apache.flink.table.factories.TableSourceFactoryContextImpl) Test(org.junit.Test)

Example 5 with TableSchema

use of org.apache.flink.table.api.TableSchema in project flink by apache.

the class HiveCatalogGenericMetadataTest method testGenericTableSchema.

// ------ tables ------
@Test
public void testGenericTableSchema() throws Exception {
    catalog.createDatabase(db1, createDb(), false);
    TableSchema tableSchema = TableSchema.builder().fields(new String[] { "col1", "col2", "col3" }, new DataType[] { DataTypes.TIMESTAMP(3), DataTypes.TIMESTAMP(6), DataTypes.TIMESTAMP(9) }).watermark("col3", "col3", DataTypes.TIMESTAMP(9)).build();
    ObjectPath tablePath = new ObjectPath(db1, "generic_table");
    try {
        catalog.createTable(tablePath, new CatalogTableImpl(tableSchema, getBatchTableProperties(), TEST_COMMENT), false);
        assertEquals(tableSchema, catalog.getTable(tablePath).getSchema());
    } finally {
        catalog.dropTable(tablePath, true);
    }
}
Also used : ObjectPath(org.apache.flink.table.catalog.ObjectPath) TableSchema(org.apache.flink.table.api.TableSchema) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) Test(org.junit.Test)

Aggregations

TableSchema (org.apache.flink.table.api.TableSchema)86 Test (org.junit.Test)54 HashMap (java.util.HashMap)26 CatalogTableImpl (org.apache.flink.table.catalog.CatalogTableImpl)21 SqlNode (org.apache.calcite.sql.SqlNode)19 ObjectPath (org.apache.flink.table.catalog.ObjectPath)19 CatalogTable (org.apache.flink.table.catalog.CatalogTable)18 DataType (org.apache.flink.table.types.DataType)16 ValidationException (org.apache.flink.table.api.ValidationException)14 TableColumn (org.apache.flink.table.api.TableColumn)10 UniqueConstraint (org.apache.flink.table.api.constraints.UniqueConstraint)10 ArrayList (java.util.ArrayList)9 List (java.util.List)9 Map (java.util.Map)9 FeatureOption (org.apache.flink.sql.parser.ddl.SqlTableLike.FeatureOption)9 MergingStrategy (org.apache.flink.sql.parser.ddl.SqlTableLike.MergingStrategy)9 CatalogBaseTable (org.apache.flink.table.catalog.CatalogBaseTable)8 ObjectIdentifier (org.apache.flink.table.catalog.ObjectIdentifier)8 Arrays (java.util.Arrays)7 Configuration (org.apache.flink.configuration.Configuration)7